lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-20 04:08:30 +02:00
parent 3661373cc2
commit ad505a7a9a
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
105 changed files with 7794 additions and 1945 deletions

View file

@ -20,7 +20,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -210,8 +209,8 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
}
line = sc.Bytes()
p := logjson.GetParser()
if err := p.ParseLogMessage(line); err != nil {
p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
}
@ -224,7 +223,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
}
p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields)
logjson.PutParser(p)
logstorage.PutJSONParser(p)
return true, nil
}

View file

@ -12,7 +12,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -105,8 +104,8 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
line = sc.Bytes()
}
p := logjson.GetParser()
if err := p.ParseLogMessage(line); err != nil {
p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
}
ts, err := extractTimestampFromFields(timeField, p.Fields)
@ -118,7 +117,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
}
p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields)
logjson.PutParser(p)
logstorage.PutJSONParser(p)
return true, nil
}

View file

@ -50,7 +50,8 @@ Below is an example output:
"u64_0": "4810489083243239145",
"float_0": "1.868",
"ip_0": "250.34.75.125",
"timestamp_0": "1799-03-16T01:34:18.311Z"
"timestamp_0": "1799-03-16T01:34:18.311Z",
"json_0": "{\"foo\":\"bar_3\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":32}"
}
{
"_time": "2024-05-08T14:34:00.854Z",
@ -70,7 +71,8 @@ Below is an example output:
"u64_0": "6593354256620219850",
"float_0": "1.085",
"ip_0": "253.151.88.158",
"timestamp_0": "2042-10-05T16:42:57.082Z"
"timestamp_0": "2042-10-05T16:42:57.082Z",
"json_0": "{\"foo\":\"bar_5\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":27}"
}
```

View file

@ -51,6 +51,8 @@ var (
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
timestampFieldsPerLog = flag.Int("timestampFieldsPerLog", 1, "The number of fields with ISO8601 timestamps per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
jsonFieldsPerLog = flag.Int("jsonFieldsPerLog", 1, "The number of JSON fields to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
statInterval = flag.Duration("statInterval", 10*time.Second, "The interval between publishing the stats")
)
@ -263,6 +265,9 @@ func generateLogsAtTimestamp(bw *bufio.Writer, workerID int, ts int64, firstStre
timestamp := toISO8601(int64(rand.Uint64()))
fmt.Fprintf(bw, `,"timestamp_%d":"%s"`, j, timestamp)
}
for j := 0; j < *jsonFieldsPerLog; j++ {
fmt.Fprintf(bw, `,"json_%d":"{\"foo\":\"bar_%d\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":%d}"`, j, rand.Intn(10), rand.Intn(100))
}
fmt.Fprintf(bw, "}\n")
logEntriesCount.Add(1)

View file

@ -0,0 +1,17 @@
{% stripspace %}
// FieldNamesResponse formats /select/logsql/field_names response
{% func FieldNamesResponse(names []string) %}
{
"names":[
{% if len(names) > 0 %}
{%q= names[0] %}
{% for _, v := range names[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,69 @@
// Code generated by qtc from "field_names_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldNamesResponse formats /select/logsql/field_names response
//line app/vlselect/logsql/field_names_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_names_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_names_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_names_response.qtpl:4
func StreamFieldNamesResponse(qw422016 *qt422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:4
qw422016.N().S(`{"names":[`)
//line app/vlselect/logsql/field_names_response.qtpl:7
if len(names) > 0 {
//line app/vlselect/logsql/field_names_response.qtpl:8
qw422016.N().Q(names[0])
//line app/vlselect/logsql/field_names_response.qtpl:9
for _, v := range names[1:] {
//line app/vlselect/logsql/field_names_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_names_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_names_response.qtpl:11
}
//line app/vlselect/logsql/field_names_response.qtpl:12
}
//line app/vlselect/logsql/field_names_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func WriteFieldNamesResponse(qq422016 qtio422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
StreamFieldNamesResponse(qw422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func FieldNamesResponse(names []string) string {
//line app/vlselect/logsql/field_names_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_names_response.qtpl:15
WriteFieldNamesResponse(qb422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_names_response.qtpl:15
}

View file

@ -0,0 +1,17 @@
{% stripspace %}
// FieldValuesResponse formats /select/logsql/field_values response
{% func FieldValuesResponse(values []string) %}
{
"values":[
{% if len(values) > 0 %}
{%q= values[0] %}
{% for _, v := range values[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,69 @@
// Code generated by qtc from "field_values_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldValuesResponse formats /select/logsql/field_values response
//line app/vlselect/logsql/field_values_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_values_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_values_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_values_response.qtpl:4
func StreamFieldValuesResponse(qw422016 *qt422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:4
qw422016.N().S(`{"values":[`)
//line app/vlselect/logsql/field_values_response.qtpl:7
if len(values) > 0 {
//line app/vlselect/logsql/field_values_response.qtpl:8
qw422016.N().Q(values[0])
//line app/vlselect/logsql/field_values_response.qtpl:9
for _, v := range values[1:] {
//line app/vlselect/logsql/field_values_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_values_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_values_response.qtpl:11
}
//line app/vlselect/logsql/field_values_response.qtpl:12
}
//line app/vlselect/logsql/field_values_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func WriteFieldValuesResponse(qq422016 qtio422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
StreamFieldValuesResponse(qw422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func FieldValuesResponse(values []string) string {
//line app/vlselect/logsql/field_values_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_values_response.qtpl:15
WriteFieldValuesResponse(qb422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_values_response.qtpl:15
}

View file

@ -0,0 +1,69 @@
{% import (
"slices"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
) %}
{% stripspace %}
// LabelsForHits formats labels for /select/logsql/hits response
{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
{
{% if len(columns) > 0 %}
{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}
{% for _, c := range columns[1:] %}
,{%q= c.Name %}:{%q= c.Values[rowIdx] %}
{% endfor %}
{% endif %}
}
{% endfunc %}
{% func HitsSeries(m map[string]*hitsSeries) %}
{
{% code
sortedKeys := make([]string, 0, len(m))
for k := range m {
sortedKeys = append(sortedKeys, k)
}
slices.Sort(sortedKeys)
%}
"hits":[
{% if len(sortedKeys) > 0 %}
{%= hitsSeriesLine(m, sortedKeys[0]) %}
{% for _, k := range sortedKeys[1:] %}
,{%= hitsSeriesLine(m, k) %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% func hitsSeriesLine(m map[string]*hitsSeries, k string) %}
{
{% code
hs := m[k]
hs.sort()
timestamps := hs.timestamps
values := hs.values
%}
"fields":{%s= k %},
"timestamps":[
{% if len(timestamps) > 0 %}
{%q= timestamps[0] %}
{% for _, ts := range timestamps[1:] %}
,{%q= ts %}
{% endfor %}
{% endif %}
],
"values":[
{% if len(values) > 0 %}
{%s= values[0] %}
{% for _, v := range values[1:] %}
,{%s= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,219 @@
// Code generated by qtc from "hits_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vlselect/logsql/hits_response.qtpl:1
package logsql
//line app/vlselect/logsql/hits_response.qtpl:1
import (
"slices"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
// LabelsForHits formats labels for /select/logsql/hits response
//line app/vlselect/logsql/hits_response.qtpl:10
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/hits_response.qtpl:10
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/hits_response.qtpl:10
func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:10
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:12
if len(columns) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().Q(columns[0].Name)
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().S(`:`)
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().Q(columns[0].Values[rowIdx])
//line app/vlselect/logsql/hits_response.qtpl:14
for _, c := range columns[1:] {
//line app/vlselect/logsql/hits_response.qtpl:14
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().Q(c.Name)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().S(`:`)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().Q(c.Values[rowIdx])
//line app/vlselect/logsql/hits_response.qtpl:16
}
//line app/vlselect/logsql/hits_response.qtpl:17
}
//line app/vlselect/logsql/hits_response.qtpl:17
qw422016.N().S(`}`)
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:19
func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:19
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:19
StreamLabelsForHits(qw422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:19
func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
//line app/vlselect/logsql/hits_response.qtpl:19
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:19
WriteLabelsForHits(qb422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:19
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:21
func StreamHitsSeries(qw422016 *qt422016.Writer, m map[string]*hitsSeries) {
//line app/vlselect/logsql/hits_response.qtpl:21
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:24
sortedKeys := make([]string, 0, len(m))
for k := range m {
sortedKeys = append(sortedKeys, k)
}
slices.Sort(sortedKeys)
//line app/vlselect/logsql/hits_response.qtpl:29
qw422016.N().S(`"hits":[`)
//line app/vlselect/logsql/hits_response.qtpl:31
if len(sortedKeys) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:32
streamhitsSeriesLine(qw422016, m, sortedKeys[0])
//line app/vlselect/logsql/hits_response.qtpl:33
for _, k := range sortedKeys[1:] {
//line app/vlselect/logsql/hits_response.qtpl:33
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:34
streamhitsSeriesLine(qw422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:35
}
//line app/vlselect/logsql/hits_response.qtpl:36
}
//line app/vlselect/logsql/hits_response.qtpl:36
qw422016.N().S(`]}`)
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:39
func WriteHitsSeries(qq422016 qtio422016.Writer, m map[string]*hitsSeries) {
//line app/vlselect/logsql/hits_response.qtpl:39
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:39
StreamHitsSeries(qw422016, m)
//line app/vlselect/logsql/hits_response.qtpl:39
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:39
func HitsSeries(m map[string]*hitsSeries) string {
//line app/vlselect/logsql/hits_response.qtpl:39
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:39
WriteHitsSeries(qb422016, m)
//line app/vlselect/logsql/hits_response.qtpl:39
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:39
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:39
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:41
func streamhitsSeriesLine(qw422016 *qt422016.Writer, m map[string]*hitsSeries, k string) {
//line app/vlselect/logsql/hits_response.qtpl:41
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:44
hs := m[k]
hs.sort()
timestamps := hs.timestamps
values := hs.values
//line app/vlselect/logsql/hits_response.qtpl:48
qw422016.N().S(`"fields":`)
//line app/vlselect/logsql/hits_response.qtpl:49
qw422016.N().S(k)
//line app/vlselect/logsql/hits_response.qtpl:49
qw422016.N().S(`,"timestamps":[`)
//line app/vlselect/logsql/hits_response.qtpl:51
if len(timestamps) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:52
qw422016.N().Q(timestamps[0])
//line app/vlselect/logsql/hits_response.qtpl:53
for _, ts := range timestamps[1:] {
//line app/vlselect/logsql/hits_response.qtpl:53
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:54
qw422016.N().Q(ts)
//line app/vlselect/logsql/hits_response.qtpl:55
}
//line app/vlselect/logsql/hits_response.qtpl:56
}
//line app/vlselect/logsql/hits_response.qtpl:56
qw422016.N().S(`],"values":[`)
//line app/vlselect/logsql/hits_response.qtpl:59
if len(values) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:60
qw422016.N().S(values[0])
//line app/vlselect/logsql/hits_response.qtpl:61
for _, v := range values[1:] {
//line app/vlselect/logsql/hits_response.qtpl:61
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:62
qw422016.N().S(v)
//line app/vlselect/logsql/hits_response.qtpl:63
}
//line app/vlselect/logsql/hits_response.qtpl:64
}
//line app/vlselect/logsql/hits_response.qtpl:64
qw422016.N().S(`]}`)
//line app/vlselect/logsql/hits_response.qtpl:67
}
//line app/vlselect/logsql/hits_response.qtpl:67
func writehitsSeriesLine(qq422016 qtio422016.Writer, m map[string]*hitsSeries, k string) {
//line app/vlselect/logsql/hits_response.qtpl:67
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:67
streamhitsSeriesLine(qw422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:67
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:67
}
//line app/vlselect/logsql/hits_response.qtpl:67
func hitsSeriesLine(m map[string]*hitsSeries, k string) string {
//line app/vlselect/logsql/hits_response.qtpl:67
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:67
writehitsSeriesLine(qb422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:67
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:67
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:67
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:67
}

View file

@ -5,6 +5,10 @@ import (
"fmt"
"math"
"net/http"
"slices"
"sort"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
@ -15,44 +19,196 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
// ProcessHitsRequest handles /select/logsql/hits request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats
func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Obtain step
stepStr := r.FormValue("step")
if stepStr == "" {
stepStr = "1d"
}
step, err := promutils.ParseDuration(stepStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse 'step' arg: %s", err)
return
}
if step <= 0 {
httpserver.Errorf(w, r, "'step' must be bigger than zero")
}
// Obtain offset
offsetStr := r.FormValue("offset")
if offsetStr == "" {
offsetStr = "0s"
}
offset, err := promutils.ParseDuration(offsetStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse 'offset' arg: %s", err)
return
}
// Obtain field entries
fields := r.Form["field"]
// Prepare the query
q.AddCountByTimePipe(int64(step), int64(offset), fields)
q.Optimize()
var mLock sync.Mutex
m := make(map[string]*hitsSeries)
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
if len(columns) == 0 || len(columns[0].Values) == 0 {
return
}
timestampValues := columns[0].Values
hitsValues := columns[len(columns)-1].Values
columns = columns[1 : len(columns)-1]
bb := blockResultPool.Get()
for i := range timestamps {
timestampStr := strings.Clone(timestampValues[i])
hitsStr := strings.Clone(hitsValues[i])
bb.Reset()
WriteLabelsForHits(bb, columns, i)
mLock.Lock()
hs, ok := m[string(bb.B)]
if !ok {
k := string(bb.B)
hs = &hitsSeries{}
m[k] = hs
}
hs.timestamps = append(hs.timestamps, timestampStr)
hs.values = append(hs.values, hitsStr)
mLock.Unlock()
}
blockResultPool.Put(bb)
}
// Execute the query
if err := vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock); err != nil {
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
return
}
// Write response
w.Header().Set("Content-Type", "application/json")
WriteHitsSeries(w, m)
}
type hitsSeries struct {
timestamps []string
values []string
}
func (hs *hitsSeries) sort() {
sort.Sort(hs)
}
func (hs *hitsSeries) Len() int {
return len(hs.timestamps)
}
func (hs *hitsSeries) Swap(i, j int) {
hs.timestamps[i], hs.timestamps[j] = hs.timestamps[j], hs.timestamps[i]
hs.values[i], hs.values[j] = hs.values[j], hs.values[i]
}
func (hs *hitsSeries) Less(i, j int) bool {
return hs.timestamps[i] < hs.timestamps[j]
}
// ProcessFieldNamesRequest handles /select/logsql/field_names request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names
func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Obtain field names for the given query
q.Optimize()
fieldNames, err := vlstorage.GetFieldNames(ctx, tenantIDs, q)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain field names: %s", err)
return
}
slices.Sort(fieldNames)
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldNamesResponse(w, fieldNames)
}
// ProcessFieldValuesRequest handles /select/logsql/field_values request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values
func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Parse fieldName query arg
fieldName := r.FormValue("field_name")
if fieldName == "" {
httpserver.Errorf(w, r, "missing 'field_name' query arg")
return
}
// Parse limit query arg
limit, err := httputils.GetInt(r, "limit")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
if limit < 0 {
limit = 0
}
// Obtain unique values for the given field
q.Optimize()
values, err := vlstorage.GetFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain values for field %q: %s", fieldName, err)
return
}
if limit == 0 || len(values) < limit {
// Sort values only if their number is below the limit.
// Otherwise there is little sense in sorting, since the query may return
// different subset of values on every execution.
slices.Sort(values)
}
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldValuesResponse(w, values)
}
// ProcessQueryRequest handles /select/logsql/query request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#http-api
func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
// Extract tenantID
tenantID, err := logstorage.GetTenantIDFromRequest(r)
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Parse query
qStr := r.FormValue("query")
q, err := logstorage.ParseQuery(qStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
return
}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
if okStart || okEnd {
if !okStart {
start = math.MinInt64
}
if !okEnd {
end = math.MaxInt64
}
q.AddTimeFilter(start, end)
}
// Parse limit query arg
limit, err := httputils.GetInt(r, "limit")
if err != nil {
@ -62,14 +218,11 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
if limit > 0 {
q.AddPipeLimit(uint64(limit))
}
q.Optimize()
tenantIDs := []logstorage.TenantID{tenantID}
bw := getBufferedWriter(w)
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
if len(columns) == 0 {
if len(columns) == 0 || len(columns[0].Values) == 0 {
return
}
@ -81,20 +234,57 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
blockResultPool.Put(bb)
}
w.Header().Set("Content-Type", "application/stream+json; charset=utf-8")
w.Header().Set("Content-Type", "application/stream+json")
q.Optimize()
err = vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock)
bw.FlushIgnoreErrors()
putBufferedWriter(bw)
if err != nil {
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", qStr, err)
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
}
}
var blockResultPool bytesutil.ByteBufferPool
func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID, error) {
// Extract tenantID
tenantID, err := logstorage.GetTenantIDFromRequest(r)
if err != nil {
return nil, nil, fmt.Errorf("cannot obtain tenanID: %w", err)
}
tenantIDs := []logstorage.TenantID{tenantID}
// Parse query
qStr := r.FormValue("query")
q, err := logstorage.ParseQuery(qStr)
if err != nil {
return nil, nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err)
}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
return nil, nil, err
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
return nil, nil, err
}
if okStart || okEnd {
if !okStart {
start = math.MinInt64
}
if !okEnd {
end = math.MaxInt64
}
q.AddTimeFilter(start, end)
}
return q, tenantIDs, nil
}
func getTimeNsec(r *http.Request, argName string) (int64, bool, error) {
s := r.FormValue(argName)
if s == "" {

View file

@ -140,12 +140,27 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
}
switch {
case path == "/logsql/query":
switch path {
case "/logsql/query":
logsqlQueryRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessQueryRequest(ctx, w, r)
return true
case "/logsql/field_values":
logsqlFieldValuesRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessFieldValuesRequest(ctx, w, r)
return true
case "/logsql/field_names":
logsqlFieldNamesRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessFieldNamesRequest(ctx, w, r)
return true
case "/logsql/hits":
logsqlHitsRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessHitsRequest(ctx, w, r)
return true
default:
return false
}
@ -166,4 +181,7 @@ func getMaxQueryDuration(r *http.Request) time.Duration {
var (
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
logsqlFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_names"}`)
logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
)

View file

@ -107,10 +107,22 @@ func MustAddRows(lr *logstorage.LogRows) {
}
// RunQuery runs the given q and calls writeBlock for the returned data blocks
func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock func(workerID uint, timestamps []int64, columns []logstorage.BlockColumn)) error {
func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock logstorage.WriteBlockFunc) error {
return strg.RunQuery(ctx, tenantIDs, q, writeBlock)
}
// GetFieldNames executes q and returns field names seen in results.
func GetFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]string, error) {
return strg.GetFieldNames(ctx, tenantIDs, q)
}
// GetFieldValues executes q and returns unique values for the fieldName seen in results.
//
// If limit > 0, then up to limit unique values are returned.
func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]string, error) {
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
}
func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) {
var ss logstorage.StorageStats
strg.UpdateStats(&ss)

View file

@ -19,6 +19,25 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip
## [v0.8.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.8.0-victorialogs)
Released at 2024-05-20
* FEATURE: add ability to extract JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe).
* FEATURE: add ability to extract [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe).
* FEATURE: add ability to extract arbitrary text from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into the output fields. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#extact-pipe).
* FEATURE: add ability to put arbitrary [queries](https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax) inside [`in()` filter](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter).
* FEATURE: add support for post-filtering of query results with [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe).
* FEATURE: allow applying individual [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) per each [stats function](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats-with-additional-filters).
* FEATURE: allow passing string values to [`min`](https://docs.victoriametrics.com/victorialogs/logsql/#min-stats) and [`max`](https://docs.victoriametrics.com/victorialogs/logsql/#max-stats) functions. Previously only numeric values could be passed to them.
* FEATURE: speed up [`sort ... limit N` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) for typical cases.
* FEATURE: allow using more convenient syntax for [`range` filters](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter) if upper or lower bound isn't needed. For example, it is possible to write `response_size:>=10KiB` instead of `response_size:range[10KiB, inf)`, or `temperature:<42` instead of `temperature:range(-inf, 42)`.
* FEATURE: add `/select/logsql/hits` HTTP endpoint for returning the number of matching logs per the given time bucket over the selected time range. See [tese docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats) for details.
* FEATURE: add `/select/logsql/field_names` HTTP endpoint for returning [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names) for details.
* FEATURE: add `/select/logsql/field_values` HTTP endpoint for returning unique values for the given [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) obtained from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values) for details.
* BUGFIX: properly take into account `offset` [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) when it already has `limit`. For example, `_time:5m | sort by (foo) offset 20 limit 10`.
## [v0.7.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.7.0-victorialogs)
Released at 2024-05-15

View file

@ -184,7 +184,7 @@ For example, the following query selects all the logs for the last 5 minutes by
_time:5m
```
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as `pipes`.
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes).
For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` [word](#word) for the last 5 minutes:
@ -213,7 +213,6 @@ single quotes `'` and backticks:
If doubt, it is recommended quoting field names and filter args.
The list of LogsQL filters:
- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) in the given time range
@ -653,16 +652,16 @@ log.level:in("error", "fatal")
It works very fast for long lists passed to `in()`.
The future VictoriaLogs versions will allow passing arbitrary [queries](#query-syntax) into `in()` filter.
For example, the following query selects all the logs for the last hour for users, who visited pages with `admin` [word](#word) in the `path`
It is possible to pass arbitrary [query](#query-syntax) inside `in(...)` filter in order to match against the results of this query.
The query inside `in(...)` must end with [`fields`](#fields-pipe) pipe containing a single field name, so VictoriaLogs could
fetch results from this field. For example, the following query selects all the logs for the last 5 minutes for users,
who visited pages with `admin` [word](#word) in the `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
during the last day:
```logsql
_time:1h AND user_id:in(_time:1d AND path:admin | fields user_id)
_time:5m AND user_id:in(_time:1d AND path:admin | fields user_id)
```
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
See also:
- [Exact filter](#exact-filter)
@ -825,6 +824,18 @@ for searching for log entries with request durations exceeding 4.2 seconds:
request.duration:range(4.2, Inf)
```
This query can be shortened to:
```logsql
request.duration:>4.2
```
The following query returns logs with request durations smaller or equal to 1.5 seconds:
```logsql
request.duration:<=1.5
```
The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding
parentheses with square brackets. For example:
@ -832,17 +843,13 @@ parentheses with square brackets. For example:
- `range(1, 10]` includes `10` in the matching range
- `range[1, 10]` includes `1` and `10` in the matching range
The range boundaries can be expressed in the following forms:
- Hexadecimal form. For example, `range(0xff, 0xABCD)`.
- Binary form. Form example, `range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`.
The range boundaries can contain any [supported numeric values](#numeric-values).
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
Extract the numeric value from the message with `parse(_msg, "the request took <request_duration> seconds")` [transformation](#transformations)
and then apply the `range()` [post-filter](#post-filters) to the extracted `request_duration` field.
and then apply the `range()` [filter pipe](#filter-pipe) to the extracted `request_duration` field.
Performance tips:
@ -884,7 +891,7 @@ user.ip:ipv4_range("1.2.3.4")
Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")`
doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from <ip>: done")` [transformation](#transformations)
and then apply the `ipv4_range()` [post-filter](#post-filters) to the extracted `ip` field.
and then apply the `ipv4_range()` [filter pipe](#filter-pipe) to the extracted `ip` field.
Hints:
@ -1045,13 +1052,18 @@ LogsQL supports the following pipes:
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`filter`](#filter-pipe) applies additional [filters](#filters) to results.
- [`limit`](#limit-pipe) limits the number selected logs.
- [`offset`](#offset-pipe) skips the given number of selected logs.
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`stats`](#stats-pipe) calculates various stats over the selected logs.
- [`uniq`](#uniq-pipe) returns unique log entires.
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
### copy pipe
@ -1096,6 +1108,97 @@ See also:
- [`rename` pipe](#rename-pipe)
- [`fields` pipe](#fields-pipe)
### extract pipe
`| extract from field_name "pattern"` [pipe](#pipes) allows extracting additional fields specified in the `pattern` from the given
`field_name` [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). Existing log fields remain unchanged
after the `| extract ...` pipe.
`| extract ...` pipe can be useful for extracting additional fields needed for further data processing with other pipes such as [`stats` pipe](#stats-pipe) or [`sort` pipe](#sort-pipe).
For example, the following query selects logs with the `error` [word](#word) for the last day,
extracts ip address from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) into `ip` field and then calculates top 10 ip addresses
with the biggest number of logs:
```logsql
_time:1d error | extract from _msg "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
```
It is expected that `_msg` field contains `ip=...` substring, which ends with space. For example, `error from ip=1.2.3.4, user_id=42`.
If the `| extract ...` pipe is applied to [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), then the `from _msg` part can be omitted.
For example, the following query is equivalent to the previous one:
```logsql
_time:1d error | extract "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
```
See also:
- [format for extract pipe pattern](#format-for-extract-pipe-pattern)
- [`unpack_json` pipe](#unpack_json-pipe)
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
#### Format for extract pipe pattern
The `pattern` part from [`| extract from src_field "pattern"` pipe](#extract-pipes) may contain arbitrary text, which matches as is to the `src_field` value.
Additionally to arbitrary text, the `pattern` may contain placeholders in the form `<...>`, which match any strings, including empty strings.
Placeholders may be named, such as `<ip>`, or anonymous, such as `<_>`. Named placeholders extract the matching text into
the corresponding [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
Anonymous placeholders are useful for skipping arbitrary text during pattern matching.
For example, if [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) contains the following text:
```
1.2.3.4 GET /foo/bar?baz 404 "Mozilla foo bar baz" some tail here
```
Then the following `| extract ...` [pipe](#pipes) can be used for extracting `ip`, `path` and `user_agent` fields from it:
```
| extract '<ip> <_> <path> <_> "<user_agent>"'
```
Note that the user-agent part of the log message is in double quotes. This means that it may contain special chars, including escaped double quote, e.g. `\"`.
This may break proper matching of the string in double quotes.
VictoriaLogs automatically detects the whole string in quotes and automatically decodes it if the first char in the placeholder is double quote or backtick.
So it is better to use the following `pattern` for proper matching of quoted strings:
```
| extract "<ip> <_> <path> <_> <user_agent>"
```
Note that the `user_agent` now matches double quotes, but VictoriaLogs automatically unquotes the matching string before storing it in the `user_agent` field.
This propery is useful for extracting JSON strings. For example, the following `pattern` properly extracts the `message` JSON string into `msg` field:
```
| extract '"message":<msg>'
```
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
For example, the following `pattern` properly matches `a < 123.456` text:
```
| extract "<left> &lt; <right>"
```
### field_names pipe
Sometimes it may be needed to get all the field names for the selected results. This may be done with `| field_names ...` [pipe](#pipes).
For example, the following query returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
from the logs over the last 5 minutes:
```logsql
_time:5m | field_names as names
```
Field names are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
See also:
- [`uniq` pipe](#uniq-pipe)
### fields pipe
By default all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) are returned in the response.
@ -1112,6 +1215,23 @@ See also:
- [`rename` pipe](#rename-pipe)
- [`delete` pipe](#delete-pipe)
### filter pipe
Sometimes it is needed to apply additional filters on the calculated results. This can be done with `| filter ...` [pipe](#pipes).
The `filter` pipe can contain arbitrary [filters](#filters).
For example, the following query returns `host` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) values
if the number of log messages with the `error` [word](#word) for them over the last hour exceeds `1_000`:
```logsql
_time:1h error | stats by (host) count() logs_count | filter logs_count:> 1_000
```
See also:
- [`stats` pipe](#stats-pipe)
- [`sort` pipe](#sort-pipe)
### limit pipe
If only a subset of selected logs must be processed, then `| limit N` [pipe](#pipes) can be used, where `N` can contain any [supported integer numeric value](#numeric-values).
@ -1200,6 +1320,12 @@ The reverse order can be applied globally via `desc` keyword after `by(...)` cla
_time:5m | sort by (foo, bar) desc
```
The `by` keyword can be skipped in `sort ...` pipe. For example, the following query is equivalent to the previous one:
```logsql
_time:5m | sort (foo, bar) desc
```
Sorting of big number of logs can consume a lot of CPU time and memory. Sometimes it is enough to return the first `N` entries with the biggest
or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe.
Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries.
@ -1232,35 +1358,6 @@ See also:
- [`limit` pipe](#limit-pipe)
- [`offset` pipe](#offset-pipe)
### uniq pipe
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
```logsql
_time:5m | uniq by (ip)
```
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path)
```
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path) limit 100
```
See also:
- [`uniq_values` stats function](#uniq_values-stats)
### stats pipe
`| stats ...` pipe allows calculating various stats over the selected logs. For example, the following LogsQL query
@ -1293,6 +1390,13 @@ _time:5m | stats count() logs_total, count_uniq(_stream) streams_total
See also:
- [stats by fields](#stats-by-fields)
- [stats by time buckets](#stats-by-time-buckets)
- [stats by time buckets with timezone offset](#stats-by-time-buckets-with-timezone-offset)
- [stats by field buckets](#stats-by-field-buckets)
- [stats by IPv4 buckets](#stats-by-ipv4-buckets)
- [stats with additional filters](#stats-with-additional-filters)
- [stats pipe functions](#stats-pipe-functions)
- [`sort` pipe](#sort-pipe)
@ -1316,6 +1420,12 @@ grouped by `(host, path)` fields:
_time:5m | stats by (host, path) count() logs_total, count_uniq(ip) ips_total
```
The `by` keyword can be skipped in `stats ...` pipe. For example, the following query is equvalent to the previous one:
```logsql
_time:5m | stats (host, path) count() logs_total, count_uniq(ip) ips_total
```
#### Stats by time buckets
The following syntax can be used for calculating stats grouped by time buckets:
@ -1384,6 +1494,139 @@ extracted from the `ip` [log field](https://docs.victoriametrics.com/victorialog
_time:5m | stats by (ip:/24) count() requests_per_subnet
```
#### Stats with additional filters
Sometimes it is needed to calculate stats on different subsets of matching logs. This can be done by inserting `if (<any_filters>)` condition
between [stats function](#stats-pipe-functions) and `result_name`, where `any_filter` can contain arbitrary [filters](#filters).
For example, the following query calculates individually the number of [logs messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with `GET`, `POST` and `PUT` [words](#word), additionally to the total number of logs over the last 5 minutes:
```logsql
_time:5m | stats
count() if (GET) gets,
count() if (POST) posts,
count() if (PUT) puts,
count() total
```
### uniq pipe
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
```logsql
_time:5m | uniq by (ip)
```
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path)
```
The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path) limit 100
```
The `by` keyword can be skipped in `uniq ...` pipe. For example, the following query is equivalent to the previous one:
```logsql
_time:5m | uniq (host, path) limit 100
```
See also:
- [`uniq_values` stats function](#uniq_values-stats)
### unpack_json pipe
`| unpack_json from field_name` pipe unpacks `{"k1":"v1", ..., "kN":"vN"}` JSON from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
into `k1`, ... `kN` field names with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
Nested JSON is unpacked according to the rules defined [here](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
For example, the following query unpacks JSON fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) across logs for the last 5 minutes:
```logsql
_time:5m | unpack_json from _msg
```
The `from _json` part can be omitted when JSON fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | unpack_json
```
If you want to make sure that the unpacked JSON fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
by adding `result_prefix "prefix_name"` to `unpack_json`. For example, the following query adds `foo_` prefix for all the unpacked fields
form [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```logsql
_time:5m | unpack_json result_prefix "foo_"
```
Performance tip: if you need extracting a single field from long JSON, it is faster to use [`extract` pipe](#extract-pipe). For example, the following query extracts `"ip"` field from JSON
stored in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```
_time:5m | extract '"ip":<field_value>'
```
See also:
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
- [`extract` pipe](#extract-pipe)
### unpack_logfmt pipe
`| unpack_logfmt from field_name` pipe unpacks `k1=v1 ... kN=vN` [logfmt](https://brandur.org/logfmt) fields
from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into `k1`, ... `kN` field names
with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
For example, the following query unpacks [logfmt](https://brandur.org/logfmt) fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
across logs for the last 5 minutes:
```logsql
_time:5m | unpack_logfmt from _msg
```
The `from _json` part can be omitted when [logfmt](https://brandur.org/logfmt) fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | unpack_logfmt
```
If you want to make sure that the unpacked [logfmt](https://brandur.org/logfmt) fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
by adding `result_prefix "prefix_name"` to `unpack_logfmt`. For example, the following query adds `foo_` prefix for all the unpacked fields
from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```logsql
_time:5m | unpack_logfmt result_prefix "foo_"
```
Performance tip: if you need extracting a single field from long [logfmt](https://brandur.org/logfmt) line, it is faster to use [`extract` pipe](#extract-pipe).
For example, the following query extracts `"ip"` field from [logfmt](https://brandur.org/logfmt) line stored
in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```
_time:5m | extract ' ip=<field_value>'
```
See also:
- [`unpack_json` pipe](#unpack_json-pipe)
- [`extract` pipe](#extract-pipe)
## stats pipe functions
LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
@ -1505,9 +1748,8 @@ See also:
### max stats
`max(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the maximum value across
`max(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the maximum value across
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
Non-numeric values are ignored.
For example, the following query returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
@ -1543,9 +1785,8 @@ See also:
### min stats
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the minimum value across
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
Non-numeric values are ignored.
For example, the following query returns the minimum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
@ -1678,48 +1919,37 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
## Transformations
It is possible to perform various transformations on the [selected log entries](#filters) at client side
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
LogsQL supports the following transformations on the log entries selected with [filters](#filters):
LogsQL will support the following transformations for the [selected](#filters) log entries:
- Extracting arbitrary text from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
See [these docs](#extract-pipe) for details.
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_json-pipe).
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_logfmt-pipe).
LogsQL will support the following transformations in the future:
- Extracting the specified fields from text [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
- Extracting the specified fields from JSON strings stored inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Extracting the specified fields from [logfmt](https://brandur.org/logfmt) strings stored
inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
according to the provided format.
- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Parsing duration strings into floating-point seconds for further [stats calculations](#stats-pipe).
- Creating a boolean field with the result of arbitrary [post-filters](#post-filters) applied to the current fields.
- Creating an integer field with the length of the given field value. This can be useful for [stats calculations](#stats-pipe).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
It is also possible to perform various transformations on the [selected log entries](#filters) at client side
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
## Post-filters
It is possible to perform post-filtering on the [selected log entries](#filters) at client side with `grep` or similar Unix commands
Post-filtering of query results can be performed at any step by using [`filter` pipe](#filter-pipe).
It is also possible to perform post-filtering of the [selected log entries](#filters) at client side with `grep` and similar Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
LogsQL will support post-filtering on the original [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and fields created by various [transformations](#transformations). The following post-filters will be supported:
- Full-text [filtering](#filters).
- [Logical filtering](#logical-filter).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
## Stats
Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and fields created by [transformations](#transformations):
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
It is also possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
## Sorting

View file

@ -35,15 +35,13 @@ The following functionality is planned in the future versions of VictoriaLogs:
- Syslog
- Journald (systemd)
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
- [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
- [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters).
- The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function.
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
- Web UI with the following abilities:
- Explore the ingested logs ([partially done](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui)).
- Build graphs over time for the ingested logs.
- Integration with Grafana.
- Build graphs over time for the ingested logs via [hits HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats).
- Integration with Grafana ([partially done](https://github.com/VictoriaMetrics/victorialogs-datasource)).
- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).
- Cluster version of VictoriaLogs.
- Ability to store data to object storage (such as S3, GCS, Minio).

View file

@ -88,6 +88,188 @@ curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID:
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/VictoriaLogs/#monitoring)
with `vl_http_requests_total{path="/select/logsql/query"}` metric.
### Querying hits stats
VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
time range grouped by `<step>` buckets. The returned results are sorted by time.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
The `<step>` arg can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#stats-by-time-buckets).
If `<step>` is missing, then it equals to `1d` (one day).
For example, the following command returns per-hour number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) over logs for the 3 hours:
```sh
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=3h' -d 'step=1h'
```
Below is an example JSON output returned from this endpoint:
```json
{
"hits": [
{
"fields": {},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
410339,
450311,
899506
]
}
]
}
```
Additionally, the `offset=<offset>` arg can be passed to `/select/logsql/hits` in order to group buckets according to the given timezone offset.
The `<offset>` can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#duration-values).
For example, the following command returns per-day number of logs with `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
over the last week in New York time zone (`-4h`):
```logsql
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=1w' -d 'step=1d' -d 'offset=-4h'
```
Additionally, any number of `field=<field_name>` args can be passed to `/select/logsql/hits` for grouping hits buckets by the mentioned `<field_name>` fields.
For example, the following query groups hits by `level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) additionally to the provided `step`:
```logsql
curl http://localhost:9428/select/logsql/hits -d 'query=*' -d 'start=3h' -d 'step=1h' -d 'field=level'
```
The grouped fields are put inside `"fields"` object:
```json
{
"hits": [
{
"fields": {
"level": "error"
},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
25,
20,
15
]
},
{
"fields": {
"level": "info"
},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
25625,
35043,
25230
]
}
]
}
```
See also:
- [Querying field names](#querying-field-names)
- [Querying field values](#querying-field-values)
- [HTTP API](#http-api)
### Querying field names
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
from result of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes:
```sh
curl http://localhost:9428/select/logsql/field_names -d 'query=error' -d 'start=5m'
```
Below is an example JSON output returned from this endpoint:
```json
{
"names": [
"_msg",
"_stream",
"_time",
"host",
"level",
"location"
]
}
```
See also:
- [Querying field values](#querying-field-values)
- [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api)
### Querying field values
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field_name=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns unique the values for `host` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) for the last 5 minutes:
```sh
curl http://localhost:9428/select/logsql/field_values -d 'query=error' -d 'field_name=host' -d 'start=5m'
```
Below is an example JSON output returned from this endpoint:
```json
{
"values": [
"host_0",
"host_1",
"host_10",
"host_100",
"host_1000"
]
}
```
The `/select/logsql/field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
See also:
- [Querying field names](#querying-field-names)
- [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api)
## Web UI
VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) and exploration

View file

@ -1,71 +0,0 @@
package logjson
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestParserSuccess(t *testing.T) {
f := func(data string, fieldsExpected []logstorage.Field) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutParser(p)
}
f("{}", nil)
f(`{"foo":"bar"}`, []logstorage.Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, []logstorage.Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
}

View file

@ -4,6 +4,7 @@ import (
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
)
func getArena() *arena {
@ -29,8 +30,12 @@ func (a *arena) reset() {
a.b = a.b[:0]
}
func (a *arena) preallocate(n int) {
a.b = slicesutil.ExtendCapacity(a.b, n)
}
func (a *arena) sizeBytes() int {
return len(a.b)
return cap(a.b)
}
func (a *arena) copyBytes(b []byte) []byte {
@ -41,9 +46,8 @@ func (a *arena) copyBytes(b []byte) []byte {
ab := a.b
abLen := len(ab)
ab = append(ab, b...)
result := ab[abLen:]
a.b = ab
return result
return ab[abLen:]
}
func (a *arena) copyBytesToString(b []byte) string {

View file

@ -11,8 +11,8 @@ func TestArena(t *testing.T) {
for i := 0; i < 10; i++ {
a := getArena()
if n := a.sizeBytes(); n != 0 {
t.Fatalf("unexpected non-zero size of empty arena: %d", n)
if n := len(a.b); n != 0 {
t.Fatalf("unexpected non-zero length of empty arena: %d", n)
}
// add values to arena
@ -35,9 +35,12 @@ func TestArena(t *testing.T) {
}
}
if n := a.sizeBytes(); n != valuesLen {
if n := len(a.b); n != valuesLen {
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
}
if n := a.sizeBytes(); n < valuesLen {
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
}
// Try allocating slices with different lengths
bs := make([]string, 100)
@ -47,9 +50,12 @@ func TestArena(t *testing.T) {
t.Fatalf("unexpected len(b); got %d; want %d", len(b), j)
}
valuesLen += j
if n := a.sizeBytes(); n != valuesLen {
if n := len(a.b); n != valuesLen {
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
}
if n := a.sizeBytes(); n < valuesLen {
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
}
for k := range b {
b[k] = byte(k)
}

View file

@ -45,6 +45,8 @@ func (bm *bitmap) copyFrom(src *bitmap) {
}
func (bm *bitmap) init(bitsLen int) {
bm.reset()
a := bm.a
wordsLen := (bitsLen + 63) / 64
a = slicesutil.SetLength(a, wordsLen)
@ -126,6 +128,7 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
if word == 0 {
continue
}
wordNew := word
for j := 0; j < 64; j++ {
mask := uint64(1) << j
if (word & mask) == 0 {
@ -136,9 +139,42 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
break
}
if !f(idx) {
a[i] &= ^mask
wordNew &= ^mask
}
}
if word != wordNew {
a[i] = wordNew
}
}
}
// forEachSetBitReadonly calls f for each set bit
func (bm *bitmap) forEachSetBitReadonly(f func(idx int)) {
if bm.areAllBitsSet() {
n := bm.bitsLen
for i := 0; i < n; i++ {
f(i)
}
return
}
a := bm.a
bitsLen := bm.bitsLen
for i, word := range a {
if word == 0 {
continue
}
for j := 0; j < 64; j++ {
mask := uint64(1) << j
if (word & mask) == 0 {
continue
}
idx := i*64 + j
if idx >= bitsLen {
break
}
f(idx)
}
}
}

View file

@ -32,7 +32,7 @@ func TestBitmap(t *testing.T) {
// Make sure that all the bits are set.
nextIdx := 0
bm.forEachSetBit(func(idx int) bool {
bm.forEachSetBitReadonly(func(idx int) {
if idx >= i {
t.Fatalf("index must be smaller than %d", i)
}
@ -40,7 +40,6 @@ func TestBitmap(t *testing.T) {
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
}
nextIdx++
return true
})
if !bm.areAllBitsSet() {
@ -66,12 +65,11 @@ func TestBitmap(t *testing.T) {
}
nextIdx = 1
bm.forEachSetBit(func(idx int) bool {
bm.forEachSetBitReadonly(func(idx int) {
if idx != nextIdx {
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
}
nextIdx += 2
return true
})
// Clear all the bits
@ -93,9 +91,8 @@ func TestBitmap(t *testing.T) {
}
bitsCount := 0
bm.forEachSetBit(func(_ int) bool {
bm.forEachSetBitReadonly(func(_ int) {
bitsCount++
return true
})
if bitsCount != 0 {
t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount)

View file

@ -0,0 +1,131 @@
package logstorage
import (
"testing"
)
func BenchmarkBitmapForEachSetBitReadonly(b *testing.B) {
const bitsLen = 64 * 1024
b.Run("no-zero-bits", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
b.Run("half-zero-bits", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
b.Run("one-set-bit", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
}
func BenchmarkBitmapForEachSetBit(b *testing.B) {
const bitsLen = 64 * 1024
b.Run("no-zero-bits-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("no-zero-bits-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
b.Run("half-zero-bits-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("half-zero-bits-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
b.Run("one-set-bit-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("one-set-bit-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
}
func benchmarkBitmapForEachSetBitReadonly(b *testing.B, bm *bitmap) {
b.SetBytes(int64(bm.bitsLen))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
bmLocal := getBitmap(bm.bitsLen)
n := 0
for pb.Next() {
bmLocal.copyFrom(bm)
bmLocal.forEachSetBitReadonly(func(idx int) {
n++
})
}
putBitmap(bmLocal)
GlobalSink.Add(uint64(n))
})
}
func benchmarkBitmapForEachSetBit(b *testing.B, bm *bitmap, isClearBits bool) {
b.SetBytes(int64(bm.bitsLen))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
bmLocal := getBitmap(bm.bitsLen)
n := 0
for pb.Next() {
bmLocal.copyFrom(bm)
bmLocal.forEachSetBit(func(idx int) bool {
n++
return !isClearBits
})
if isClearBits {
if !bmLocal.isZero() {
panic("BUG: bitmap must have no set bits")
}
} else {
if bmLocal.isZero() {
panic("BUG: bitmap must have some set bits")
}
}
}
putBitmap(bmLocal)
GlobalSink.Add(uint64(n))
})
}

File diff suppressed because it is too large Load diff

View file

@ -146,7 +146,7 @@ func (bs *blockSearch) partPath() string {
return bs.bsw.p.path
}
func (bs *blockSearch) search(bsw *blockSearchWork) {
func (bs *blockSearch) search(bsw *blockSearchWork, bm *bitmap) {
bs.reset()
bs.bsw = bsw
@ -154,23 +154,22 @@ func (bs *blockSearch) search(bsw *blockSearchWork) {
bs.csh.initFromBlockHeader(&bs.a, bsw.p, &bsw.bh)
// search rows matching the given filter
bm := getBitmap(int(bsw.bh.rowsCount))
defer putBitmap(bm)
bm.init(int(bsw.bh.rowsCount))
bm.setBits()
bs.bsw.so.filter.apply(bs, bm)
bs.bsw.so.filter.applyToBlockSearch(bs, bm)
bs.br.mustInit(bs, bm)
if bm.isZero() {
// The filter doesn't match any logs in the current block.
return
}
bs.br.mustInit(bs, bm)
// fetch the requested columns to bs.br.
if bs.bsw.so.needAllColumns {
bs.br.fetchAllColumns(bs, bm)
bs.br.initAllColumns(bs, bm)
} else {
bs.br.fetchRequestedColumns(bs, bm)
bs.br.initRequestedColumns(bs, bm)
}
}

View file

@ -37,7 +37,22 @@ func (fs fieldsSet) getAll() []string {
return a
}
func (fs fieldsSet) addFields(fields []string) {
for _, f := range fields {
fs.add(f)
}
}
func (fs fieldsSet) removeFields(fields []string) {
for _, f := range fields {
fs.remove(f)
}
}
func (fs fieldsSet) contains(field string) bool {
if field == "" {
field = "_msg"
}
_, ok := fs[field]
if !ok {
_, ok = fs["*"]
@ -45,28 +60,19 @@ func (fs fieldsSet) contains(field string) bool {
return ok
}
func (fs fieldsSet) removeAll(fields []string) {
for _, f := range fields {
fs.remove(f)
}
}
func (fs fieldsSet) remove(field string) {
if field == "*" {
fs.reset()
return
}
if !fs.contains("*") {
if field == "" {
field = "_msg"
}
delete(fs, field)
}
}
func (fs fieldsSet) addAll(fields []string) {
for _, f := range fields {
fs.add(f)
}
}
func (fs fieldsSet) add(field string) {
if fs.contains("*") {
return
@ -76,5 +82,8 @@ func (fs fieldsSet) add(field string) {
fs["*"] = struct{}{}
return
}
if field == "" {
field = "_msg"
}
fs[field] = struct{}{}
}

View file

@ -17,9 +17,10 @@ func TestFieldsSet(t *testing.T) {
}
fs.add("foo")
fs.add("bar")
fs.add("")
s := fs.String()
if s != "[bar,foo]" {
t.Fatalf("unexpected String() result; got %s; want %s", s, "[bar,foo]")
if s != "[_msg,bar,foo]" {
t.Fatalf("unexpected String() result; got %s; want %s", s, "[_msg,bar,foo]")
}
if !fs.contains("foo") {
t.Fatalf("fs must contain foo")
@ -27,6 +28,12 @@ func TestFieldsSet(t *testing.T) {
if !fs.contains("bar") {
t.Fatalf("fs must contain bar")
}
if !fs.contains("") {
t.Fatalf("fs must contain _msg")
}
if !fs.contains("_msg") {
t.Fatalf("fs must contain _msg")
}
if fs.contains("baz") {
t.Fatalf("fs musn't contain baz")
}
@ -41,6 +48,13 @@ func TestFieldsSet(t *testing.T) {
if fs.contains("bar") {
t.Fatalf("fs mustn't contain bar")
}
fs.remove("")
if fs.contains("") {
t.Fatalf("fs mustn't contain _msg")
}
if fs.contains("_msg") {
t.Fatalf("fs mustn't contain _msg")
}
// verify *
fs.add("*")
@ -59,25 +73,25 @@ func TestFieldsSet(t *testing.T) {
t.Fatalf("fs must be empty")
}
// verify addAll, getAll, removeAll
fs.addAll([]string{"foo", "bar"})
if !fs.contains("foo") || !fs.contains("bar") {
t.Fatalf("fs must contain foo and bar")
// verify addFields, removeFields, getAll
fs.addFields([]string{"foo", "bar", "_msg"})
if !fs.contains("foo") || !fs.contains("bar") || !fs.contains("_msg") {
t.Fatalf("fs must contain foo, bar and _msg")
}
a := fs.getAll()
if !reflect.DeepEqual(a, []string{"bar", "foo"}) {
t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"bar", "foo"})
if !reflect.DeepEqual(a, []string{"_msg", "bar", "foo"}) {
t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"_msg", "bar", "foo"})
}
fs.removeAll([]string{"bar", "baz"})
if fs.contains("bar") || fs.contains("baz") {
t.Fatalf("fs mustn't contain bar and baz")
fs.removeFields([]string{"bar", "baz", "_msg"})
if fs.contains("bar") || fs.contains("baz") || fs.contains("_msg") {
t.Fatalf("fs mustn't contain bar, baz and _msg")
}
if !fs.contains("foo") {
t.Fatalf("fs must contain foo")
}
// verify clone
fs.addAll([]string{"foo", "bar", "baz"})
fs.addFields([]string{"foo", "bar", "baz"})
fsStr := fs.String()
fsCopy := fs.clone()
fsCopyStr := fsCopy.String()

View file

@ -5,6 +5,104 @@ type filter interface {
// String returns string representation of the filter
String() string
// apply must update bm according to the filter applied to the given bs block
apply(bs *blockSearch, bm *bitmap)
// udpdateNeededFields must update neededFields with fields needed for the filter
updateNeededFields(neededFields fieldsSet)
// applyToBlockSearch must update bm according to the filter applied to the given bs block
applyToBlockSearch(bs *blockSearch, bm *bitmap)
// applyToBlockResult must update bm according to the filter applied to the given br block
applyToBlockResult(br *blockResult, bm *bitmap)
}
// visitFilter sequentially calls visitFunc for filters inside f.
//
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
// It returns the result of the last visitFunc call.
func visitFilter(f filter, visitFunc func(f filter) bool) bool {
switch t := f.(type) {
case *filterAnd:
return visitFilters(t.filters, visitFunc)
case *filterOr:
return visitFilters(t.filters, visitFunc)
case *filterNot:
return visitFilter(t.f, visitFunc)
default:
return visitFunc(f)
}
}
// visitFilters calls visitFunc per each filter in filters.
//
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
// It returns the result of the last visitFunc call.
func visitFilters(filters []filter, visitFunc func(f filter) bool) bool {
for _, f := range filters {
if visitFilter(f, visitFunc) {
return true
}
}
return false
}
// copyFilter recursively copies f filters with the help of copyFunc if visitFunc returns true for them.
//
// It doesn't copy other filters by returning them as is.
func copyFilter(f filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) (filter, error) {
switch t := f.(type) {
case *filterAnd:
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fa := &filterAnd{
filters: filters,
}
return fa, nil
case *filterOr:
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fo := &filterOr{
filters: filters,
}
return fo, nil
case *filterNot:
f, err := copyFilter(t.f, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fn := &filterNot{
f: f,
}
return fn, nil
default:
if !visitFunc(t) {
// Nothing to copy
return t, nil
}
return copyFunc(t)
}
}
// copyFilters recursively copies filters with the help of copyfunc if visitFunc returns true for them.
//
// It doesn't copy other filters by returning them as is.
func copyFilters(filters []filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) ([]filter, error) {
if !visitFilters(filters, visitFunc) {
// Nothing to copy
return filters, nil
}
// Copy filters.
filtersNew := make([]filter, len(filters))
for i, f := range filters {
fNew, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
return nil, err
}
filtersNew[i] = fNew
}
return filtersNew, nil
}

View file

@ -31,7 +31,24 @@ func (fa *filterAnd) String() string {
return strings.Join(a, " ")
}
func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
func (fa *filterAnd) updateNeededFields(neededFields fieldsSet) {
for _, f := range fa.filters {
f.updateNeededFields(neededFields)
}
}
func (fa *filterAnd) applyToBlockResult(br *blockResult, bm *bitmap) {
for _, f := range fa.filters {
f.applyToBlockResult(br, bm)
if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway.
return
}
}
}
func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
if !fa.matchMessageBloomFilter(bs) {
// Fast path - fa doesn't match _msg bloom filter.
bm.resetBits()
@ -40,7 +57,7 @@ func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
// Slow path - verify every filter separately.
for _, f := range fa.filters {
f.apply(bs, bm)
f.applyToBlockSearch(bs, bm)
if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway.

View file

@ -29,6 +29,10 @@ func (fp *filterAnyCasePhrase) String() string {
return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.phrase))
}
func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePhrase) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
@ -47,7 +51,12 @@ func (fp *filterAnyCasePhrase) initPhraseLowercase() {
fp.phraseLowercase = strings.ToLower(fp.phrase)
}
func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) {
func (fp *filterAnyCasePhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
phraseLowercase := fp.getPhraseLowercase()
applyToBlockResultGeneric(br, bm, fp.fieldName, phraseLowercase, matchAnyCasePhrase)
}
func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName
phraseLowercase := fp.getPhraseLowercase()
@ -100,10 +109,12 @@ func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phraseLowercase string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchAnyCasePhrase(v, phraseLowercase) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -33,6 +33,10 @@ func (fp *filterAnyCasePrefix) String() string {
return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
}
func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePrefix) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
@ -51,7 +55,12 @@ func (fp *filterAnyCasePrefix) initPrefixLowercase() {
fp.prefixLowercase = strings.ToLower(fp.prefix)
}
func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) {
func (fp *filterAnyCasePrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
prefixLowercase := fp.getPrefixLowercase()
applyToBlockResultGeneric(br, bm, fp.fieldName, prefixLowercase, matchAnyCasePrefix)
}
func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName
prefixLowercase := fp.getPrefixLowercase()
@ -101,10 +110,12 @@ func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchAnyCasePrefix(v, prefixLowercase) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -24,6 +24,10 @@ func (fe *filterExact) String() string {
return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value))
}
func (fe *filterExact) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fe.fieldName)
}
func (fe *filterExact) getTokens() []string {
fe.tokensOnce.Do(fe.initTokens)
return fe.tokens
@ -33,7 +37,133 @@ func (fe *filterExact) initTokens() {
fe.tokens = tokenizeStrings(nil, []string{fe.value})
}
func (fe *filterExact) apply(bs *blockSearch, bm *bitmap) {
func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
value := fe.value
c := br.getColumnByName(fe.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if v != value {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByExactValue(br, bm, c, value)
return
}
switch c.valueType {
case valueTypeString:
matchColumnByExactValue(br, bm, c, value)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if v == value {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<8) {
bm.resetBits()
return
}
nNeeded := uint8(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint8(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint16:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<16) {
bm.resetBits()
return
}
nNeeded := uint16(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint16(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint32:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<32) {
bm.resetBits()
return
}
nNeeded := uint32(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint32(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint64:
nNeeded, ok := tryParseUint64(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint64(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeFloat64:
fNeeded, ok := tryParseFloat64(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
f := unmarshalFloat64(valuesEncoded[idx])
return f == fNeeded
})
case valueTypeIPv4:
ipNeeded, ok := tryParseIPv4(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
ip := unmarshalIPv4(valuesEncoded[idx])
return ip == ipNeeded
})
case valueTypeTimestampISO8601:
timestampNeeded, ok := tryParseTimestampISO8601(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
timestamp := unmarshalTimestampISO8601(valuesEncoded[idx])
return timestamp == timestampNeeded
})
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByExactValue(br *blockResult, bm *bitmap, c *blockResultColumn, value string) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
return values[idx] == value
})
}
func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fe.fieldName
value := fe.value
@ -121,10 +251,12 @@ func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, val
func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if v == value {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -23,6 +23,10 @@ func (fep *filterExactPrefix) String() string {
return fmt.Sprintf("%sexact(%s*)", quoteFieldNameIfNeeded(fep.fieldName), quoteTokenIfNeeded(fep.prefix))
}
func (fep *filterExactPrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fep.fieldName)
}
func (fep *filterExactPrefix) getTokens() []string {
fep.tokensOnce.Do(fep.initTokens)
return fep.tokens
@ -32,7 +36,11 @@ func (fep *filterExactPrefix) initTokens() {
fep.tokens = getTokensSkipLast(fep.prefix)
}
func (fep *filterExactPrefix) apply(bs *blockSearch, bm *bitmap) {
func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
applyToBlockResultGeneric(br, bm, fep.fieldName, fep.prefix, matchExactPrefix)
}
func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fep.fieldName
prefix := fep.prefix
@ -91,7 +99,7 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return matchExactPrefix(s, prefix)
})
bbPool.Put(bb)
@ -108,7 +116,7 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchExactPrefix(s, prefix)
})
bbPool.Put(bb)
@ -126,7 +134,7 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchExactPrefix(s, prefix)
})
bbPool.Put(bb)
@ -134,10 +142,12 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchExactPrefix(v, prefix) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -18,6 +18,15 @@ type filterIn struct {
fieldName string
values []string
// needeExecuteQuery is set to true if q must be executed for populating values before filter execution.
needExecuteQuery bool
// If q is non-nil, then values must be populated from q before filter execution.
q *Query
// qFieldName must be set to field name for obtaining values from if q is non-nil.
qFieldName string
tokenSetsOnce sync.Once
tokenSets [][]string
@ -47,12 +56,22 @@ type filterIn struct {
}
func (fi *filterIn) String() string {
args := ""
if fi.q != nil {
args = fi.q.String()
} else {
values := fi.values
a := make([]string, len(values))
for i, value := range values {
a[i] = quoteTokenIfNeeded(value)
}
return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), strings.Join(a, ","))
args = strings.Join(a, ",")
}
return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), args)
}
func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fi.fieldName)
}
func (fi *filterIn) getTokenSets() [][]string {
@ -249,7 +268,95 @@ func (fi *filterIn) initTimestampISO8601Values() {
fi.timestampISO8601Values = m
}
func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) {
func (fi *filterIn) applyToBlockResult(br *blockResult, bm *bitmap) {
if len(fi.values) == 0 {
bm.resetBits()
return
}
c := br.getColumnByName(fi.fieldName)
if c.isConst {
stringValues := fi.getStringValues()
v := c.valuesEncoded[0]
if _, ok := stringValues[v]; !ok {
bm.resetBits()
}
return
}
if c.isTime {
fi.matchColumnByStringValues(br, bm, c)
return
}
switch c.valueType {
case valueTypeString:
fi.matchColumnByStringValues(br, bm, c)
case valueTypeDict:
stringValues := fi.getStringValues()
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if _, ok := stringValues[v]; ok {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
binValues := fi.getUint8Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint16:
binValues := fi.getUint16Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint32:
binValues := fi.getUint32Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint64:
binValues := fi.getUint64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeFloat64:
binValues := fi.getFloat64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeIPv4:
binValues := fi.getIPv4Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeTimestampISO8601:
binValues := fi.getTimestampISO8601Values()
matchColumnByBinValues(br, bm, c, binValues)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fi *filterIn) matchColumnByStringValues(br *blockResult, bm *bitmap, c *blockResultColumn) {
stringValues := fi.getStringValues()
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
_, ok := stringValues[v]
return ok
})
}
func matchColumnByBinValues(br *blockResult, bm *bitmap, c *blockResultColumn, binValues map[string]struct{}) {
if len(binValues) == 0 {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
_, ok := binValues[v]
return ok
})
}
func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fi.fieldName
if len(fi.values) == 0 {
@ -314,6 +421,10 @@ func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) {
}
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) {
if len(values) == 0 {
bm.resetBits()
return
}
if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) {
bm.resetBits()
return
@ -344,10 +455,12 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets []
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if _, ok := values[v]; ok {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -3,8 +3,6 @@ package logstorage
import (
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -18,12 +16,83 @@ type filterIPv4Range struct {
}
func (fr *filterIPv4Range) String() string {
minValue := string(encoding.MarshalUint32(nil, fr.minValue))
maxValue := string(encoding.MarshalUint32(nil, fr.maxValue))
return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), toIPv4String(nil, minValue), toIPv4String(nil, maxValue))
minValue := marshalIPv4String(nil, fr.minValue)
maxValue := marshalIPv4String(nil, fr.maxValue)
return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), minValue, maxValue)
}
func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) {
func (fr *filterIPv4Range) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterIPv4Range) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchIPv4Range(v, minValue, maxValue) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchIPv4Range(v, minValue, maxValue)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchIPv4Range(v, minValue, maxValue) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
ip := unmarshalIPv4(valuesEncoded[idx])
return ip >= minValue && ip <= maxValue
})
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minValue := fr.minValue
maxValue := fr.maxValue
@ -75,10 +144,12 @@ func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchIPv4Range(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
@ -108,8 +179,7 @@ func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, m
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint32(b)
n := unmarshalIPv4(v)
return n >= minValue && n <= maxValue
})
}

View file

@ -3,7 +3,6 @@ package logstorage
import (
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -22,7 +21,101 @@ func (fr *filterLenRange) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr
}
func (fr *filterLenRange) apply(bs *blockSearch, bm *bitmap) {
func (fr *filterLenRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterLenRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minLen := fr.minLen
maxLen := fr.maxLen
if minLen > maxLen {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchLenRange(v, minLen, maxLen) {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByLenRange(br, bm, c, minLen, maxLen)
}
switch c.valueType {
case valueTypeString:
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchLenRange(v, minLen, maxLen) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
if minLen > 3 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint16:
if minLen > 5 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint32:
if minLen > 10 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint64:
if minLen > 20 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeFloat64:
if minLen > 24 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeIPv4:
if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeTimestampISO8601:
matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByLenRange(br *blockResult, bm *bitmap, c *blockResultColumn, minLen, maxLen uint64) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchLenRange(v, minLen, maxLen)
})
}
func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minLen := fr.minLen
maxLen := fr.maxLen
@ -89,7 +182,7 @@ func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
@ -103,7 +196,7 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
@ -111,10 +204,12 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchLenRange(v, minLen, maxLen) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
@ -127,6 +222,10 @@ func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
}
func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 3 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
@ -141,6 +240,10 @@ func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
}
func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 5 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
@ -155,6 +258,10 @@ func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
}
func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 10 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
@ -169,6 +276,10 @@ func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
}
func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 20 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
@ -191,12 +302,10 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = marshalUint64(bb.B[:0], ch.minValue)
s := bytesutil.ToUnsafeString(bb.B)
if maxLen < uint64(len(s)) {
bb.B = marshalUint64String(bb.B[:0], ch.minValue)
if maxLen < uint64(len(bb.B)) {
return false
}
bb.B = marshalUint64(bb.B[:0], ch.maxValue)
s = bytesutil.ToUnsafeString(bb.B)
return minLen <= uint64(len(s))
bb.B = marshalUint64String(bb.B[:0], ch.maxValue)
return minLen <= uint64(len(bb.B))
}

View file

@ -8,6 +8,14 @@ func (fn *filterNoop) String() string {
return ""
}
func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
func (fn *filterNoop) updateNeededFields(_ fieldsSet) {
// nothing to do
}
func (fn *filterNoop) applyToBlockResult(_ *blockResult, _ *bitmap) {
// nothing to do
}
func (fn *filterNoop) applyToBlockSearch(_ *blockSearch, _ *bitmap) {
// nothing to do
}

View file

@ -16,12 +16,26 @@ func (fn *filterNot) String() string {
return "!" + s
}
func (fn *filterNot) apply(bs *blockSearch, bm *bitmap) {
func (fn *filterNot) updateNeededFields(neededFields fieldsSet) {
fn.f.updateNeededFields(neededFields)
}
func (fn *filterNot) applyToBlockResult(br *blockResult, bm *bitmap) {
// Minimize the number of rows to check by the filter by applying it
// only to the rows, which match the bm, e.g. they may change the bm result.
bmTmp := getBitmap(bm.bitsLen)
bmTmp.copyFrom(bm)
fn.f.apply(bs, bmTmp)
fn.f.applyToBlockResult(br, bmTmp)
bm.andNot(bmTmp)
putBitmap(bmTmp)
}
func (fn *filterNot) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
// Minimize the number of rows to check by the filter by applying it
// only to the rows, which match the bm, e.g. they may change the bm result.
bmTmp := getBitmap(bm.bitsLen)
bmTmp.copyFrom(bm)
fn.f.applyToBlockSearch(bs, bmTmp)
bm.andNot(bmTmp)
putBitmap(bmTmp)
}

View file

@ -21,7 +21,13 @@ func (fo *filterOr) String() string {
return strings.Join(a, " or ")
}
func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
func (fo *filterOr) updateNeededFields(neededFields fieldsSet) {
for _, f := range fo.filters {
f.updateNeededFields(neededFields)
}
}
func (fo *filterOr) applyToBlockResult(br *blockResult, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters {
@ -36,7 +42,30 @@ func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
// since the result already matches all the values from the block.
break
}
f.apply(bs, bmTmp)
f.applyToBlockResult(br, bmTmp)
bmResult.or(bmTmp)
}
putBitmap(bmTmp)
bm.copyFrom(bmResult)
putBitmap(bmResult)
}
func (fo *filterOr) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters {
// Minimize the number of rows to check by the filter by checking only
// the rows, which may change the output bm:
// - bm matches them, e.g. the caller wants to get them
// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
bmTmp.copyFrom(bm)
bmTmp.andNot(bmResult)
if bmTmp.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result already matches all the values from the block.
break
}
f.applyToBlockSearch(bs, bmTmp)
bmResult.or(bmTmp)
}
putBitmap(bmTmp)

View file

@ -32,6 +32,10 @@ func (fp *filterPhrase) String() string {
return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase)
}
func (fp *filterPhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterPhrase) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
@ -41,7 +45,11 @@ func (fp *filterPhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
}
func (fp *filterPhrase) apply(bs *blockSearch, bm *bitmap) {
func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
applyToBlockResultGeneric(br, bm, fp.fieldName, fp.phrase, matchPhrase)
}
func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName
phrase := fp.phrase
@ -107,7 +115,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return matchPhrase(s, phrase)
})
bbPool.Put(bb)
@ -131,7 +139,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchPhrase(s, phrase)
})
bbPool.Put(bb)
@ -160,7 +168,7 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchPhrase(s, phrase)
})
bbPool.Put(bb)
@ -168,10 +176,12 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchPhrase(v, phrase) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
@ -249,7 +259,7 @@ func getPhrasePos(s, phrase string) int {
}
func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) {
if len(encodedValues) == 0 {
if bytes.IndexByte(encodedValues, 1) < 0 {
// Fast path - the phrase is missing in the valuesDict
bm.resetBits()
return
@ -259,8 +269,11 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod
if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
}
n := bytes.IndexByte(encodedValues, v[0])
return n >= 0
idx := v[0]
if int(idx) >= len(encodedValues) {
logger.Panicf("FATAL: %s: too big index for dict value; got %d; must be smaller than %d", bs.partPath(), idx, len(encodedValues))
}
return encodedValues[idx] == 1
})
}
@ -294,26 +307,107 @@ func isMsgFieldName(fieldName string) bool {
return fieldName == "" || fieldName == "_msg"
}
func toFloat64StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
func toFloat64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
}
bb.B = toFloat64String(bb.B[:0], v)
f := unmarshalFloat64(v)
bb.B = marshalFloat64String(bb.B[:0], f)
return bytesutil.ToUnsafeString(bb.B)
}
func toIPv4StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
func toIPv4String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
}
bb.B = toIPv4String(bb.B[:0], v)
ip := unmarshalIPv4(v)
bb.B = marshalIPv4String(bb.B[:0], ip)
return bytesutil.ToUnsafeString(bb.B)
}
func toTimestampISO8601StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
func toTimestampISO8601String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v))
}
bb.B = toTimestampISO8601String(bb.B[:0], v)
timestamp := unmarshalTimestampISO8601(v)
bb.B = marshalTimestampISO8601String(bb.B[:0], timestamp)
return bytesutil.ToUnsafeString(bb.B)
}
func applyToBlockResultGeneric(br *blockResult, bm *bitmap, fieldName, phrase string, matchFunc func(v, phrase string) bool) {
c := br.getColumnByName(fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchFunc(v, phrase) {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
return
}
switch c.valueType {
case valueTypeString:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchFunc(v, phrase) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<8) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint16:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<16) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint32:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<32) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint64:
_, ok := tryParseUint64(phrase)
if !ok {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeFloat64:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeIPv4:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeTimestampISO8601:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByPhraseGeneric(br *blockResult, bm *bitmap, c *blockResultColumn, phrase string, matchFunc func(v, phrase string) bool) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
return matchFunc(values[idx], phrase)
})
}

View file

@ -7,7 +7,6 @@ import (
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -31,6 +30,10 @@ func (fp *filterPrefix) String() string {
return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
}
func (fp *filterPrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterPrefix) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
@ -40,7 +43,11 @@ func (fp *filterPrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix)
}
func (fp *filterPrefix) apply(bs *blockSearch, bm *bitmap) {
func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
applyToBlockResultGeneric(bs, bm, fp.fieldName, fp.prefix, matchPrefix)
}
func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName
prefix := fp.prefix
@ -102,7 +109,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return matchPrefix(s, prefix)
})
bbPool.Put(bb)
@ -123,7 +130,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchPrefix(s, prefix)
})
bbPool.Put(bb)
@ -151,7 +158,7 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchPrefix(s, prefix)
})
bbPool.Put(bb)
@ -159,10 +166,12 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchPrefix(v, prefix) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
@ -321,8 +330,8 @@ func toUint8String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
}
n := uint64(v[0])
bb.B = marshalUint64(bb.B[:0], n)
n := unmarshalUint8(v)
bb.B = marshalUint8String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}
@ -330,9 +339,8 @@ func toUint16String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 2 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := uint64(encoding.UnmarshalUint16(b))
bb.B = marshalUint64(bb.B[:0], n)
n := unmarshalUint16(v)
bb.B = marshalUint16String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}
@ -340,9 +348,8 @@ func toUint32String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := uint64(encoding.UnmarshalUint32(b))
bb.B = marshalUint64(bb.B[:0], n)
n := unmarshalUint32(v)
bb.B = marshalUint32String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}
@ -350,8 +357,7 @@ func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint64(b)
bb.B = marshalUint64(bb.B[:0], n)
n := unmarshalUint64(v)
bb.B = marshalUint64String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}

View file

@ -3,8 +3,6 @@ package logstorage
import (
"math"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -13,6 +11,7 @@ import (
// Example LogsQL: `fieldName:range(minValue, maxValue]`
type filterRange struct {
fieldName string
minValue float64
maxValue float64
@ -20,10 +19,124 @@ type filterRange struct {
}
func (fr *filterRange) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "range" + fr.stringRepr
return quoteFieldNameIfNeeded(fr.fieldName) + fr.stringRepr
}
func (fr *filterRange) apply(bs *blockSearch, bm *bitmap) {
func (fr *filterRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchRange(v, minValue, maxValue) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchRange(v, minValue, maxValue)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchRange(v, minValue, maxValue) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint8(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint16:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint16(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint32:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint32(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint64:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := unmarshalUint64(v)
return n >= minValueUint && n <= maxValueUint
})
case valueTypeFloat64:
if minValue > math.Float64frombits(c.maxValue) || maxValue < math.Float64frombits(c.minValue) {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
f := unmarshalFloat64(v)
return f >= minValue && f <= maxValue
})
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minValue := fr.minValue
maxValue := fr.maxValue
@ -83,19 +196,19 @@ func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint64(b)
f := math.Float64frombits(n)
f := unmarshalFloat64(v)
return f >= minValue && f <= maxValue
})
}
func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchRange(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
@ -118,7 +231,7 @@ func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
}
n := uint64(v[0])
n := uint64(unmarshalUint8(v))
return n >= minValueUint && n <= maxValueUint
})
bbPool.Put(bb)
@ -135,8 +248,7 @@ func matchUint16ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 2 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := uint64(encoding.UnmarshalUint16(b))
n := uint64(unmarshalUint16(v))
return n >= minValueUint && n <= maxValueUint
})
bbPool.Put(bb)
@ -153,8 +265,7 @@ func matchUint32ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := uint64(encoding.UnmarshalUint32(b))
n := uint64(unmarshalUint32(v))
return n >= minValueUint && n <= maxValueUint
})
bbPool.Put(bb)
@ -171,8 +282,7 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint64(b)
n := unmarshalUint64(v)
return n >= minValueUint && n <= maxValueUint
})
bbPool.Put(bb)

View file

@ -19,7 +19,18 @@ func (fr *filterRegexp) String() string {
return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
}
func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) {
func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
re := fr.re
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
return re.MatchString(v)
})
}
func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
re := fr.re
@ -69,7 +80,7 @@ func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) {
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return re.MatchString(s)
})
bbPool.Put(bb)
@ -78,7 +89,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return re.MatchString(s)
})
bbPool.Put(bb)
@ -87,7 +98,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return re.MatchString(s)
})
bbPool.Put(bb)
@ -95,10 +106,12 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if re.MatchString(v) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -31,6 +31,10 @@ func (fs *filterSequence) String() string {
return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(fs.fieldName), strings.Join(a, ","))
}
func (fs *filterSequence) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fs.fieldName)
}
func (fs *filterSequence) getTokens() []string {
fs.tokensOnce.Do(fs.initTokens)
return fs.tokens
@ -58,7 +62,18 @@ func (fs *filterSequence) initNonEmptyPhrases() {
fs.nonEmptyPhrases = result
}
func (fs *filterSequence) apply(bs *blockSearch, bm *bitmap) {
func (fs *filterSequence) applyToBlockResult(br *blockResult, bm *bitmap) {
phrases := fs.getNonEmptyPhrases()
if len(phrases) == 0 {
return
}
applyToBlockResultGeneric(br, bm, fs.fieldName, "", func(v, _ string) bool {
return matchSequence(v, phrases)
})
}
func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fs.fieldName
phrases := fs.getNonEmptyPhrases()
@ -124,7 +139,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
// Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
@ -145,7 +160,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
// the ip to string before searching for prefix there.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
@ -163,7 +178,7 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
// of floating-point numbers :(
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
@ -171,10 +186,12 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchSequence(v, phrases) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -2,6 +2,8 @@ package logstorage
import (
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// filterStream is the filter for `_stream:{...}`
@ -27,6 +29,10 @@ func (fs *filterStream) String() string {
return "_stream:" + s
}
func (fs *filterStream) updateNeededFields(neededFields fieldsSet) {
neededFields.add("_stream")
}
func (fs *filterStream) getStreamIDs() map[streamID]struct{} {
fs.streamIDsOnce.Do(fs.initStreamIDs)
return fs.streamIDs
@ -41,7 +47,66 @@ func (fs *filterStream) initStreamIDs() {
fs.streamIDs = m
}
func (fs *filterStream) apply(bs *blockSearch, bm *bitmap) {
func (fs *filterStream) applyToBlockResult(br *blockResult, bm *bitmap) {
if fs.f.isEmpty() {
return
}
c := br.getColumnByName("_stream")
if c.isConst {
v := c.valuesEncoded[0]
if !fs.f.matchStreamName(v) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return fs.f.matchStreamName(v)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if fs.f.matchStreamName(v) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
bm.resetBits()
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fs *filterStream) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
if fs.f.isEmpty() {
return
}

View file

@ -22,7 +22,25 @@ func (fr *filterStringRange) String() string {
return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.minValue), quoteTokenIfNeeded(fr.maxValue))
}
func (fr *filterStringRange) apply(bs *blockSearch, bm *bitmap) {
func (fr *filterStringRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterStringRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
return matchStringRange(v, minValue, maxValue)
})
}
func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minValue := fr.minValue
maxValue := fr.maxValue
@ -81,7 +99,7 @@ func matchTimestampISO8601ByStringRange(bs *blockSearch, ch *columnHeader, bm *b
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
s := toTimestampISO8601String(bs, bb, v)
return matchStringRange(s, minValue, maxValue)
})
bbPool.Put(bb)
@ -95,7 +113,7 @@ func matchIPv4ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minVa
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
s := toIPv4String(bs, bb, v)
return matchStringRange(s, minValue, maxValue)
})
bbPool.Put(bb)
@ -109,7 +127,7 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
s := toFloat64String(bs, bb, v)
return matchStringRange(s, minValue, maxValue)
})
bbPool.Put(bb)
@ -117,10 +135,12 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchStringRange(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i))
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)

View file

@ -197,11 +197,6 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
}
workersCount := 3
s.search(workersCount, so, nil, func(_ uint, br *blockResult) {
// Verify tenantID
if !br.streamID.tenantID.equal(&tenantID) {
t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID)
}
// Verify columns
cs := br.getColumns()
if len(cs) != 1 {

View file

@ -1,8 +1,12 @@
package logstorage
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// filterTime filters by time.
//
// It is expressed as `_time:(start, end]` in LogsQL.
// It is expressed as `_time:[start, end]` in LogsQL.
type filterTime struct {
// mintimestamp is the minimum timestamp in nanoseconds to find
minTimestamp int64
@ -18,7 +22,95 @@ func (ft *filterTime) String() string {
return "_time:" + ft.stringRepr
}
func (ft *filterTime) apply(bs *blockSearch, bm *bitmap) {
func (ft *filterTime) updateNeededFields(neededFields fieldsSet) {
neededFields.add("_time")
}
func (ft *filterTime) applyToBlockResult(br *blockResult, bm *bitmap) {
minTimestamp := ft.minTimestamp
maxTimestamp := ft.maxTimestamp
if minTimestamp > maxTimestamp {
bm.resetBits()
return
}
c := br.getColumnByName("_time")
if c.isConst {
v := c.valuesEncoded[0]
if !ft.matchTimestampString(v) {
bm.resetBits()
}
return
}
if c.isTime {
timestamps := br.timestamps
bm.forEachSetBit(func(idx int) bool {
timestamp := timestamps[idx]
return ft.matchTimestampValue(timestamp)
})
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return ft.matchTimestampString(v)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if ft.matchTimestampString(v) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
bm.resetBits()
case valueTypeTimestampISO8601:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
timestamp := unmarshalTimestampISO8601(v)
return ft.matchTimestampValue(timestamp)
})
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (ft *filterTime) matchTimestampString(v string) bool {
timestamp, ok := tryParseTimestampRFC3339Nano(v)
if !ok {
return false
}
return ft.matchTimestampValue(timestamp)
}
func (ft *filterTime) matchTimestampValue(timestamp int64) bool {
return timestamp >= ft.minTimestamp && timestamp <= ft.maxTimestamp
}
func (ft *filterTime) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
minTimestamp := ft.minTimestamp
maxTimestamp := ft.maxTimestamp

View file

@ -268,7 +268,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
}
return ids
case "=~":
re := tf.getRegexp()
re := tf.regexp
if re.MatchString("") {
// (field=~"|re") => (field="" or field=~"re")
ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
@ -280,7 +280,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
}
return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
case "!~":
re := tf.getRegexp()
re := tf.regexp
if re.MatchString("") {
// (field!~"|re") => (field!="" and not field=~"re")
ids := is.getStreamIDsForTagName(tenantID, tf.tagName)

View file

@ -50,7 +50,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
f := func(filterStream string, expectedStreamIDs []streamID) {
t.Helper()
sf := mustNewStreamFilter(filterStream)
sf := mustNewTestStreamFilter(filterStream)
if expectedStreamIDs == nil {
expectedStreamIDs = []streamID{}
}
@ -68,7 +68,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
AccountID: 1,
ProjectID: 2,
}
sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`)
sf := mustNewTestStreamFilter(`{job="job-0",instance="instance-0"}`)
for i := 0; i < 3; i++ {
streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
if len(streamIDs) > 0 {

View file

@ -1,4 +1,4 @@
package logjson
package logstorage
import (
"fmt"
@ -6,21 +6,20 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/valyala/fastjson"
)
// Parser parses a single JSON log message into Fields.
// JSONParser parses a single JSON log message into Fields.
//
// See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model
//
// Use GetParser() for obtaining the parser.
type Parser struct {
type JSONParser struct {
// Fields contains the parsed JSON line after Parse() call
//
// The Fields are valid until the next call to ParseLogMessage()
// or until the parser is returned to the pool with PutParser() call.
Fields []logstorage.Field
Fields []Field
// p is used for fast JSON parsing
p fastjson.Parser
@ -33,59 +32,79 @@ type Parser struct {
prefixBuf []byte
}
func (p *Parser) reset() {
fields := p.Fields
for i := range fields {
lf := &fields[i]
lf.Name = ""
lf.Value = ""
}
p.Fields = fields[:0]
func (p *JSONParser) reset() {
p.resetNobuf()
p.buf = p.buf[:0]
}
func (p *JSONParser) resetNobuf() {
clear(p.Fields)
p.Fields = p.Fields[:0]
p.prefixBuf = p.prefixBuf[:0]
}
// GetParser returns Parser ready to parse JSON lines.
// GetJSONParser returns JSONParser ready to parse JSON lines.
//
// Return the parser to the pool when it is no longer needed by calling PutParser().
func GetParser() *Parser {
// Return the parser to the pool when it is no longer needed by calling PutJSONParser().
func GetJSONParser() *JSONParser {
v := parserPool.Get()
if v == nil {
return &Parser{}
return &JSONParser{}
}
return v.(*Parser)
return v.(*JSONParser)
}
// PutParser returns the parser to the pool.
// PutJSONParser returns the parser to the pool.
//
// The parser cannot be used after returning to the pool.
func PutParser(p *Parser) {
func PutJSONParser(p *JSONParser) {
p.reset()
parserPool.Put(p)
}
var parserPool sync.Pool
// ParseLogMessageNoResetBuf parses the given JSON log message msg into p.Fields.
//
// It adds the given prefix to all the parsed field names.
//
// The p.Fields remains valid until the next call to PutJSONParser().
func (p *JSONParser) ParseLogMessageNoResetBuf(msg, prefix string) error {
return p.parseLogMessage(msg, prefix, false)
}
// ParseLogMessage parses the given JSON log message msg into p.Fields.
//
// The p.Fields remains valid until the next call to ParseLogMessage() or PutParser().
func (p *Parser) ParseLogMessage(msg []byte) error {
s := bytesutil.ToUnsafeString(msg)
v, err := p.p.Parse(s)
// It adds the given prefix to all the parsed field names.
//
// The p.Fields remains valid until the next call to ParseLogMessage() or PutJSONParser().
func (p *JSONParser) ParseLogMessage(msg []byte, prefix string) error {
msgStr := bytesutil.ToUnsafeString(msg)
return p.parseLogMessage(msgStr, prefix, true)
}
func (p *JSONParser) parseLogMessage(msg, prefix string, resetBuf bool) error {
v, err := p.p.Parse(msg)
if err != nil {
return fmt.Errorf("cannot parse json: %w", err)
}
if t := v.Type(); t != fastjson.TypeObject {
return fmt.Errorf("expecting json dictionary; got %s", t)
}
if resetBuf {
p.reset()
} else {
p.resetNobuf()
}
p.prefixBuf = append(p.prefixBuf[:0], prefix...)
p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v)
return nil
}
// RenameField renames field with the oldName to newName in p.Fields
func (p *Parser) RenameField(oldName, newName string) {
func (p *JSONParser) RenameField(oldName, newName string) {
if oldName == "" {
return
}
@ -99,7 +118,7 @@ func (p *Parser) RenameField(oldName, newName string) {
}
}
func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) {
func appendLogFields(dst []Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]Field, []byte, []byte) {
o := v.GetObject()
o.Visit(func(k []byte, v *fastjson.Value) {
t := v.Type()
@ -133,13 +152,13 @@ func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjs
return dst, dstBuf, prefixBuf
}
func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) {
func appendLogField(dst []Field, dstBuf, prefixBuf, k, value []byte) ([]Field, []byte) {
dstBufLen := len(dstBuf)
dstBuf = append(dstBuf, prefixBuf...)
dstBuf = append(dstBuf, k...)
name := dstBuf[dstBufLen:]
dst = append(dst, logstorage.Field{
dst = append(dst, Field{
Name: bytesutil.ToUnsafeString(name),
Value: bytesutil.ToUnsafeString(value),
})

View file

@ -0,0 +1,97 @@
package logstorage
import (
"reflect"
"testing"
)
func TestJSONParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), "")
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutJSONParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestJSONParserSuccess(t *testing.T) {
f := func(data, prefix string, fieldsExpected []Field) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), prefix)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutJSONParser(p)
}
f("{}", "", nil)
f(`{"foo":"bar"}`, "", []Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":"bar"}`, "prefix_", []Field{
{
Name: "prefix_foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "", []Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "prefix_", []Field{
{
Name: "prefix_foo.bar",
Value: "baz",
},
{
Name: "prefix_a",
Value: "1",
},
{
Name: "prefix_b",
Value: "true",
},
{
Name: "prefix_c",
Value: "[1,2]",
},
{
Name: "prefix_d",
Value: "false",
},
})
}

View file

@ -10,8 +10,8 @@ import (
"unicode"
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
type lexer struct {
@ -39,6 +39,20 @@ type lexer struct {
currentTimestamp int64
}
type lexerState struct {
lex lexer
}
func (lex *lexer) backupState() *lexerState {
return &lexerState{
lex: *lex,
}
}
func (lex *lexer) restoreState(ls *lexerState) {
*lex = ls.lex
}
// newLexer returns new lexer for the given s.
//
// The lex.token points to the first token in s.
@ -206,10 +220,45 @@ func (q *Query) String() string {
return s
}
// AddCountByTimePipe adds '| stats by (_time:step offset off, field1, ..., fieldN) count() hits' to the end of q.
func (q *Query) AddCountByTimePipe(step, off int64, fields []string) {
{
// add 'stats by (_time:step offset off, fields) count() hits'
stepStr := string(marshalDuration(nil, step))
offsetStr := string(marshalDuration(nil, off))
byFieldsStr := "_time:" + stepStr + " offset " + offsetStr
for _, f := range fields {
byFieldsStr += ", " + quoteTokenIfNeeded(f)
}
s := fmt.Sprintf("stats by (%s) count() hits", byFieldsStr)
lex := newLexer(s)
ps, err := parsePipeStats(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
}
q.pipes = append(q.pipes, ps)
}
{
// Add 'sort by (_time, fields)' in order to get consistent order of the results.
sortFieldsStr := "_time"
for _, f := range fields {
sortFieldsStr += ", " + quoteTokenIfNeeded(f)
}
s := fmt.Sprintf("sort by (%s)", sortFieldsStr)
lex := newLexer(s)
ps, err := parsePipeSort(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
}
q.pipes = append(q.pipes, ps)
}
}
// AddTimeFilter adds global filter _time:[start ... end] to q.
func (q *Query) AddTimeFilter(start, end int64) {
startStr := marshalTimestampRFC3339Nano(nil, start)
endStr := marshalTimestampRFC3339Nano(nil, end)
startStr := marshalTimestampRFC3339NanoString(nil, start)
endStr := marshalTimestampRFC3339NanoString(nil, end)
ft := &filterTime{
minTimestamp: start,
maxTimestamp: end,
@ -234,7 +283,7 @@ func (q *Query) AddTimeFilter(start, end int64) {
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
func (q *Query) AddPipeLimit(n uint64) {
q.pipes = append(q.pipes, &pipeLimit{
n: n,
limit: n,
})
}
@ -242,6 +291,68 @@ func (q *Query) AddPipeLimit(n uint64) {
func (q *Query) Optimize() {
q.pipes = optimizeSortOffsetPipes(q.pipes)
q.pipes = optimizeSortLimitPipes(q.pipes)
q.pipes = optimizeUniqLimitPipes(q.pipes)
q.pipes = optimizeFilterPipes(q.pipes)
// Merge `q | filter ...` into q.
if len(q.pipes) > 0 {
pf, ok := q.pipes[0].(*pipeFilter)
if ok {
q.f = mergeFiltersAnd(q.f, pf.f)
q.pipes = append(q.pipes[:0], q.pipes[1:]...)
}
}
// Optimize `q | field_names ...` by marking pipeFieldNames as first pipe.
if len(q.pipes) > 0 {
pf, ok := q.pipes[0].(*pipeFieldNames)
if ok {
pf.isFirstPipe = true
}
}
// Substitute '*' prefixFilter with filterNoop in order to avoid reading _msg data.
q.f = removeStarFilters(q.f)
// Call Optimize for queries from 'in(query)' filters.
optimizeFilterIn(q.f)
for _, p := range q.pipes {
switch t := p.(type) {
case *pipeStats:
for _, f := range t.funcs {
if f.iff != nil {
optimizeFilterIn(f.iff)
}
}
}
}
}
func removeStarFilters(f filter) filter {
visitFunc := func(f filter) bool {
fp, ok := f.(*filterPrefix)
return ok && isMsgFieldName(fp.fieldName) && fp.prefix == ""
}
copyFunc := func(_ filter) (filter, error) {
fn := &filterNoop{}
return fn, nil
}
f, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
logger.Fatalf("BUG: unexpected error: %s", err)
}
return f
}
func optimizeFilterIn(f filter) {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
if ok && fi.q != nil {
fi.q.Optimize()
}
return false
}
_ = visitFilter(f, visitFunc)
}
func optimizeSortOffsetPipes(pipes []pipe) []pipe {
@ -259,7 +370,7 @@ func optimizeSortOffsetPipes(pipes []pipe) []pipe {
continue
}
if ps.offset == 0 && ps.limit == 0 {
ps.offset = po.n
ps.offset = po.offset
}
pipes = append(pipes[:i], pipes[i+1:]...)
}
@ -280,14 +391,78 @@ func optimizeSortLimitPipes(pipes []pipe) []pipe {
i++
continue
}
if ps.limit == 0 || pl.n < ps.limit {
ps.limit = pl.n
if ps.limit == 0 || pl.limit < ps.limit {
ps.limit = pl.limit
}
pipes = append(pipes[:i], pipes[i+1:]...)
}
return pipes
}
func optimizeUniqLimitPipes(pipes []pipe) []pipe {
// Merge 'uniq ... | limit ...' into 'uniq ... limit ...'
i := 1
for i < len(pipes) {
pl, ok := pipes[i].(*pipeLimit)
if !ok {
i++
continue
}
pu, ok := pipes[i-1].(*pipeUniq)
if !ok {
i++
continue
}
if pu.limit == 0 || pl.limit < pu.limit {
pu.limit = pl.limit
}
pipes = append(pipes[:i], pipes[i+1:]...)
}
return pipes
}
func optimizeFilterPipes(pipes []pipe) []pipe {
// Merge multiple `| filter ...` pipes into a single `filter ...` pipe
i := 1
for i < len(pipes) {
pf1, ok := pipes[i-1].(*pipeFilter)
if !ok {
i++
continue
}
pf2, ok := pipes[i].(*pipeFilter)
if !ok {
i++
continue
}
pf1.f = mergeFiltersAnd(pf1.f, pf2.f)
pipes = append(pipes[:i], pipes[i+1:]...)
}
return pipes
}
func mergeFiltersAnd(f1, f2 filter) filter {
fa1, ok := f1.(*filterAnd)
if ok {
fa1.filters = append(fa1.filters, f2)
return fa1
}
fa2, ok := f2.(*filterAnd)
if ok {
filters := make([]filter, len(fa2.filters)+1)
filters[0] = f1
copy(filters[1:], fa2.filters)
fa2.filters = filters
return fa2
}
return &filterAnd{
filters: []filter{f1, f2},
}
}
func (q *Query) getNeededColumns() ([]string, []string) {
neededFields := newFieldsSet()
neededFields.add("*")
@ -304,7 +479,17 @@ func (q *Query) getNeededColumns() ([]string, []string) {
// ParseQuery parses s.
func ParseQuery(s string) (*Query, error) {
lex := newLexer(s)
q, err := parseQuery(lex)
if err != nil {
return nil, err
}
if !lex.isEnd() {
return nil, fmt.Errorf("unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]", q, lex.context(), lex.s)
}
return q, nil
}
func parseQuery(lex *lexer) (*Query, error) {
f, err := parseFilter(lex)
if err != nil {
return nil, fmt.Errorf("%w; context: [%s]", err, lex.context())
@ -319,10 +504,6 @@ func ParseQuery(s string) (*Query, error) {
}
q.pipes = pipes
if !lex.isEnd() {
return nil, fmt.Errorf("unexpected unparsed tail; context: [%s]; tail: [%s]", lex.context(), lex.s)
}
return q, nil
}
@ -407,6 +588,10 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken)
}
return parseParensFilter(lex, fieldName)
case lex.isKeyword(">"):
return parseFilterGT(lex, fieldName)
case lex.isKeyword("<"):
return parseFilterLT(lex, fieldName)
case lex.isKeyword("not", "!"):
return parseFilterNot(lex, fieldName)
case lex.isKeyword("exact"):
@ -432,19 +617,27 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
case lex.isKeyword(",", ")", "[", "]"):
return nil, fmt.Errorf("unexpected token %q", lex.token)
}
phrase := getCompoundPhrase(lex, fieldName != "")
phrase, err := getCompoundPhrase(lex, fieldName != "")
if err != nil {
return nil, err
}
return parseFilterForPhrase(lex, phrase, fieldName)
}
func getCompoundPhrase(lex *lexer, allowColon bool) string {
func getCompoundPhrase(lex *lexer, allowColon bool) (string, error) {
stopTokens := []string{"*", ",", "(", ")", "[", "]", "|", ""}
if lex.isKeyword(stopTokens...) {
return "", fmt.Errorf("compound phrase cannot start with '%s'", lex.token)
}
phrase := lex.token
rawPhrase := lex.rawToken
lex.nextToken()
suffix := getCompoundSuffix(lex, allowColon)
if suffix == "" {
return phrase
return phrase, nil
}
return rawPhrase + suffix
return rawPhrase + suffix, nil
}
func getCompoundSuffix(lex *lexer, allowColon bool) string {
@ -460,19 +653,24 @@ func getCompoundSuffix(lex *lexer, allowColon bool) string {
return s
}
func getCompoundToken(lex *lexer) string {
func getCompoundToken(lex *lexer) (string, error) {
stopTokens := []string{",", "(", ")", "[", "]", "|", ""}
if lex.isKeyword(stopTokens...) {
return "", fmt.Errorf("compound token cannot start with '%s'", lex.token)
}
s := lex.token
rawS := lex.rawToken
lex.nextToken()
suffix := ""
for !lex.isSkippedSpace && !lex.isKeyword(",", "(", ")", "[", "]", "|", "") {
for !lex.isSkippedSpace && !lex.isKeyword(stopTokens...) {
s += lex.token
lex.nextToken()
}
if suffix == "" {
return s
return s, nil
}
return rawS + suffix
return rawS + suffix, nil
}
func getCompoundFuncArg(lex *lexer) string {
@ -483,7 +681,7 @@ func getCompoundFuncArg(lex *lexer) string {
rawArg := lex.rawToken
lex.nextToken()
suffix := ""
for !lex.isSkippedSpace && !lex.isKeyword("*", ",", ")", "") {
for !lex.isSkippedSpace && !lex.isKeyword("*", ",", "(", ")", "|", "") {
suffix += lex.rawToken
lex.nextToken()
}
@ -704,13 +902,72 @@ func tryParseIPv4CIDR(s string) (uint32, uint32, bool) {
}
func parseFilterIn(lex *lexer, fieldName string) (filter, error) {
return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
f := &filterIn{
if !lex.isKeyword("in") {
return nil, fmt.Errorf("expecting 'in' keyword")
}
// Try parsing in(arg1, ..., argN) at first
lexState := lex.backupState()
fi, err := parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
fi := &filterIn{
fieldName: fieldName,
values: args,
}
return f, nil
return fi, nil
})
if err == nil {
return fi, nil
}
// Parse in(query | fields someField) then
lex.restoreState(lexState)
lex.nextToken()
if !lex.isKeyword("(") {
return nil, fmt.Errorf("missing '(' after 'in'")
}
lex.nextToken()
q, err := parseQuery(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse query inside 'in(...)': %w", err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("missing ')' after 'in(%s)'", q)
}
lex.nextToken()
qFieldName, err := getFieldNameFromPipes(q.pipes)
if err != nil {
return nil, fmt.Errorf("cannot determine field name for values in 'in(%s)': %w", q, err)
}
fi = &filterIn{
fieldName: fieldName,
needExecuteQuery: true,
q: q,
qFieldName: qFieldName,
}
return fi, nil
}
func getFieldNameFromPipes(pipes []pipe) (string, error) {
if len(pipes) == 0 {
return "", fmt.Errorf("missing 'fields' or 'uniq' pipes at the end of query")
}
switch t := pipes[len(pipes)-1].(type) {
case *pipeFields:
if t.containsStar || len(t.fields) != 1 {
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
}
return t.fields[0], nil
case *pipeUniq:
if len(t.byFields) != 1 {
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
}
return t.byFields[0], nil
default:
return "", fmt.Errorf("missing 'fields' or 'uniq' pipe at the end of query")
}
}
func parseFilterSequence(lex *lexer, fieldName string) (filter, error) {
@ -755,6 +1012,70 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
})
}
func parseFilterGT(lex *lexer, fieldName string) (filter, error) {
if fieldName == "" {
return nil, fmt.Errorf("'>' and '>=' must be prefixed with the field name")
}
lex.nextToken()
includeMinValue := false
op := ">"
if lex.isKeyword("=") {
lex.nextToken()
includeMinValue = true
op = ">="
}
minValue, fStr, err := parseFloat64(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
}
if !includeMinValue {
minValue = nextafter(minValue, inf)
}
fr := &filterRange{
fieldName: fieldName,
minValue: minValue,
maxValue: inf,
stringRepr: op + fStr,
}
return fr, nil
}
func parseFilterLT(lex *lexer, fieldName string) (filter, error) {
if fieldName == "" {
return nil, fmt.Errorf("'<' and '<=' must be prefixed with the field name")
}
lex.nextToken()
includeMaxValue := false
op := "<"
if lex.isKeyword("=") {
lex.nextToken()
includeMaxValue = true
op = "<="
}
maxValue, fStr, err := parseFloat64(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
}
if !includeMaxValue {
maxValue = nextafter(maxValue, -inf)
}
fr := &filterRange{
fieldName: fieldName,
minValue: -inf,
maxValue: maxValue,
stringRepr: op + fStr,
}
return fr, nil
}
func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token
lex.nextToken()
@ -802,19 +1123,19 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
}
lex.nextToken()
stringRepr := ""
stringRepr := "range"
if includeMinValue {
stringRepr += "["
} else {
stringRepr += "("
minValue = math.Nextafter(minValue, inf)
minValue = nextafter(minValue, inf)
}
stringRepr += minValueStr + ", " + maxValueStr
if includeMaxValue {
stringRepr += "]"
} else {
stringRepr += ")"
maxValue = math.Nextafter(maxValue, -inf)
maxValue = nextafter(maxValue, -inf)
}
fr := &filterRange{
@ -828,7 +1149,10 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
}
func parseFloat64(lex *lexer) (float64, string, error) {
s := getCompoundToken(lex)
s, err := getCompoundToken(lex)
if err != nil {
return 0, "", fmt.Errorf("cannot parse float64: %w", err)
}
f, err := strconv.ParseFloat(s, 64)
if err == nil {
return f, s, nil
@ -868,6 +1192,9 @@ func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (f
if lex.isKeyword(",") {
return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName)
}
if lex.isKeyword("(") {
return nil, fmt.Errorf("unexpected '(' - missing arg in %s()", funcName)
}
arg := getCompoundFuncArg(lex)
args = append(args, arg)
if lex.isKeyword(")") {
@ -912,13 +1239,14 @@ func parseFilterTimeWithOffset(lex *lexer) (*filterTime, error) {
if !lex.isKeyword("offset") {
return ft, nil
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing offset for _time filter %s", ft)
lex.nextToken()
s, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse offset in _time filter: %w", err)
}
s := getCompoundToken(lex)
d, ok := tryParseDuration(s)
if !ok {
return nil, fmt.Errorf("cannot parse offset %q for _time filter %s: %w", s, ft, err)
return nil, fmt.Errorf("cannot parse offset %q for _time filter %s", s, ft)
}
offset := int64(d)
ft.minTimestamp -= offset
@ -935,7 +1263,10 @@ func parseFilterTime(lex *lexer) (*filterTime, error) {
case lex.isKeyword("("):
startTimeInclude = false
default:
s := getCompoundToken(lex)
s, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse _time filter: %w", err)
}
sLower := strings.ToLower(s)
if sLower == "now" || startsWithYear(s) {
// Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
@ -1076,113 +1407,21 @@ func stripTimezoneSuffix(s string) string {
}
func parseFilterStream(lex *lexer) (*filterStream, error) {
if !lex.isKeyword("{") {
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after '{'")
}
var filters []*andStreamFilter
for {
f, err := parseAndStreamFilter(lex)
sf, err := parseStreamFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("}"):
lex.nextToken()
fs := &filterStream{
f: &StreamFilter{
orFilters: filters,
},
f: sf,
}
return fs, nil
case lex.isKeyword("or"):
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
}
if lex.isKeyword("}") {
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
}
default:
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
}
}
}
func newStreamFilter(s string) (*StreamFilter, error) {
lex := newLexer(s)
fs, err := parseFilterStream(lex)
if err != nil {
return nil, err
}
return fs.f, nil
}
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
var filters []*streamTagFilter
for {
if lex.isKeyword("}") {
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
}
f, err := parseStreamTagFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("or", "}"):
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
case lex.isKeyword(","):
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing stream filter after ','")
}
default:
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
}
}
}
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
tagName := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
}
if !lex.isKeyword("=", "!=", "=~", "!~") {
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
}
op := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
}
value := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
}
stf := &streamTagFilter{
tagName: tagName,
op: op,
value: value,
}
if op == "=~" || op == "!~" {
re, err := regexutil.NewPromRegex(value)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
}
stf.regexp = re
}
return stf, nil
}
func parseTime(lex *lexer) (int64, string, error) {
s := getCompoundToken(lex)
s, err := getCompoundToken(lex)
if err != nil {
return 0, "", err
}
t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9)
if err != nil {
return 0, "", err
@ -1312,3 +1551,10 @@ func parseInt(s string) (int64, error) {
}
return nn, nil
}
func nextafter(f, xInf float64) float64 {
if math.IsInf(f, 0) {
return f
}
return math.Nextafter(f, xInf)
}

View file

@ -1,7 +1,6 @@
package logstorage
import (
"math"
"reflect"
"strings"
"testing"
@ -34,51 +33,6 @@ func TestLexer(t *testing.T) {
[]string{"_stream", ":", "{", "foo", "=", "bar", ",", "a", "=~", "baz", ",", "b", "!=", "cd", ",", "d,}a", "!~", "abc", "}"})
}
func TestNewStreamFilterSuccess(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
sf, err := newStreamFilter(s)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
result := sf.String()
if result != resultExpected {
t.Fatalf("unexpected StreamFilter; got %s; want %s", result, resultExpected)
}
}
f("{}", "{}")
f(`{foo="bar"}`, `{foo="bar"}`)
f(`{ "foo" =~ "bar.+" , baz!="a" or x="y"}`, `{foo=~"bar.+",baz!="a" or x="y"}`)
f(`{"a b"='c}"d' OR de="aaa"}`, `{"a b"="c}\"d" or de="aaa"}`)
f(`{a="b", c="d" or x="y"}`, `{a="b",c="d" or x="y"}`)
}
func TestNewStreamFilterFailure(t *testing.T) {
f := func(s string) {
t.Helper()
sf, err := newStreamFilter(s)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if sf != nil {
t.Fatalf("expecting nil sf; got %v", sf)
}
}
f("")
f("}")
f("{")
f("{foo")
f("{foo}")
f("{'foo")
f("{foo=")
f("{foo or bar}")
f("{foo=bar")
f("{foo=bar baz}")
f("{foo='bar' baz='x'}")
}
func TestParseTimeDuration(t *testing.T) {
f := func(s string, durationExpected time.Duration) {
t.Helper()
@ -323,6 +277,10 @@ func TestParseFilterIn(t *testing.T) {
f(`:in("foo bar,baz")`, ``, []string{"foo bar,baz"})
f(`ip:in(1.2.3.4, 5.6.7.8, 9.10.11.12)`, `ip`, []string{"1.2.3.4", "5.6.7.8", "9.10.11.12"})
f(`foo-bar:in(foo,bar-baz.aa"bb","c,)d")`, `foo-bar`, []string{"foo", `bar-baz.aa"bb"`, "c,)d"})
// verify `in(query)` - it shouldn't set values
f(`in(x|fields foo)`, ``, nil)
f(`a:in(* | fields bar)`, `a`, nil)
}
func TestParseFilterIPv4Range(t *testing.T) {
@ -537,15 +495,25 @@ func TestParseRangeFilter(t *testing.T) {
f(`range:range["-1.234e5", "-2e-5"]`, `range`, -1.234e5, -2e-5)
f(`_msg:range[1, 2]`, `_msg`, 1, 2)
f(`:range(1, 2)`, ``, math.Nextafter(1, inf), math.Nextafter(2, -inf))
f(`range[1, 2)`, ``, 1, math.Nextafter(2, -inf))
f(`range("1", 2]`, ``, math.Nextafter(1, inf), 2)
f(`:range(1, 2)`, ``, nextafter(1, inf), nextafter(2, -inf))
f(`range[1, 2)`, ``, 1, nextafter(2, -inf))
f(`range("1", 2]`, ``, nextafter(1, inf), 2)
f(`response_size:range[1KB, 10MiB]`, `response_size`, 1_000, 10*(1<<20))
f(`response_size:range[1G, 10Ti]`, `response_size`, 1_000_000_000, 10*(1<<40))
f(`response_size:range[10, inf]`, `response_size`, 10, inf)
f(`duration:range[100ns, 1y2w2.5m3s5ms]`, `duration`, 100, 1*nsecsPerYear+2*nsecsPerWeek+2.5*nsecsPerMinute+3*nsecsPerSecond+5*nsecsPerMillisecond)
f(`foo:>10.43`, `foo`, nextafter(10.43, inf), inf)
f(`foo: > -10.43`, `foo`, nextafter(-10.43, inf), inf)
f(`foo:>=10.43`, `foo`, 10.43, inf)
f(`foo: >= -10.43`, `foo`, -10.43, inf)
f(`foo:<10.43`, `foo`, -inf, nextafter(10.43, -inf))
f(`foo: < -10.43`, `foo`, -inf, nextafter(-10.43, -inf))
f(`foo:<=10.43`, `foo`, -inf, 10.43)
f(`foo: <= 10.43`, `foo`, -inf, 10.43)
}
func TestParseQuerySuccess(t *testing.T) {
@ -723,8 +691,8 @@ func TestParseQuerySuccess(t *testing.T) {
f("exact(foo*)", `exact(foo*)`)
f("exact('foo bar),|baz')", `exact("foo bar),|baz")`)
f("exact('foo bar),|baz'*)", `exact("foo bar),|baz"*)`)
f(`exact(foo|b:ar)`, `exact("foo|b:ar")`)
f(`foo:exact(foo|b:ar*)`, `foo:exact("foo|b:ar"*)`)
f(`exact(foo/b:ar)`, `exact("foo/b:ar")`)
f(`foo:exact(foo/b:ar*)`, `foo:exact("foo/b:ar"*)`)
// i filter
f("i(foo)", `i(foo)`)
@ -732,14 +700,21 @@ func TestParseQuerySuccess(t *testing.T) {
f("i(`foo`* )", `i(foo*)`)
f("i(' foo ) bar')", `i(" foo ) bar")`)
f("i('foo bar'*)", `i("foo bar"*)`)
f(`foo:i(foo:bar-baz|aa+bb)`, `foo:i("foo:bar-baz|aa+bb")`)
f(`foo:i(foo:bar-baz/aa+bb)`, `foo:i("foo:bar-baz/aa+bb")`)
// in filter
// in filter with values
f(`in()`, `in()`)
f(`in(foo)`, `in(foo)`)
f(`in(foo, bar)`, `in(foo,bar)`)
f(`in("foo bar", baz)`, `in("foo bar",baz)`)
f(`foo:in(foo-bar|baz)`, `foo:in("foo-bar|baz")`)
f(`foo:in(foo-bar/baz)`, `foo:in("foo-bar/baz")`)
// in filter with query
f(`in(err|fields x)`, `in(err | fields x)`)
f(`ip:in(foo and user:in(admin, moderator)|fields ip)`, `ip:in(foo user:in(admin,moderator) | fields ip)`)
f(`x:in(_time:5m y:in(*|fields z) | stats by (q) count() rows|fields q)`, `x:in(_time:5m y:in(* | fields z) | stats by (q) count(*) as rows | fields q)`)
f(`in(bar:in(1,2,3) | uniq (x)) | stats count() rows`, `in(bar:in(1,2,3) | uniq by (x)) | stats count(*) as rows`)
f(`in((1) | fields z) | stats count() rows`, `in(1 | fields z) | stats count(*) as rows`)
// ipv4_range filter
f(`ipv4_range(1.2.3.4, "5.6.7.8")`, `ipv4_range(1.2.3.4, 5.6.7.8)`)
@ -768,11 +743,18 @@ func TestParseQuerySuccess(t *testing.T) {
f(`range(0x1ff, inf)`, `range(0x1ff, inf)`)
f(`range(-INF,+inF)`, `range(-INF, +inF)`)
f(`range(1.5K, 22.5GiB)`, `range(1.5K, 22.5GiB)`)
f(`foo:range(5,inf)`, `foo:range(5, inf)`)
// >, >=, < and <= filter
f(`foo: > 10.5M`, `foo:>10.5M`)
f(`foo: >= 10.5M`, `foo:>=10.5M`)
f(`foo: < 10.5M`, `foo:<10.5M`)
f(`foo: <= 10.5M`, `foo:<=10.5M`)
// re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)
f("re(foo)", `re("foo")`)
f(`foo:re(foo-bar|baz.)`, `foo:re("foo-bar|baz.")`)
f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`)
// seq filter
f(`seq()`, `seq()`)
@ -829,6 +811,10 @@ func TestParseQuerySuccess(t *testing.T) {
// multiple fields pipes
f(`foo | fields bar | fields baz, abc`, `foo | fields bar | fields baz, abc`)
// field_names pipe
f(`foo | field_names as x`, `foo | field_names as x`)
f(`foo | field_names y`, `foo | field_names as y`)
// copy and cp pipe
f(`* | copy foo as bar`, `* | copy foo as bar`)
f(`* | cp foo bar`, `* | copy foo as bar`)
@ -966,6 +952,16 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | stats by (_time:week) count() foo`, `* | stats by (_time:week) count(*) as foo`)
f(`* | stats by (_time:month) count() foo`, `* | stats by (_time:month) count(*) as foo`)
f(`* | stats by (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
f(`* | stats (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
// stats pipe with per-func filters
f(`* | stats count() if (foo bar) rows`, `* | stats count(*) if (foo bar) as rows`)
f(`* | stats by (_time:1d offset -2h, f2)
count() if (is_admin:true or _msg:"foo bar"*) as foo,
sum(duration) if (host:in('foo.com', 'bar.com') and path:/foobar) as bar`,
`* | stats by (_time:1d offset -2h, f2) count(*) if (is_admin:true or "foo bar"*) as foo, sum(duration) if (host:in(foo.com,bar.com) path:"/foobar") as bar`)
f(`* | stats count(x) if (error ip:in(_time:1d | fields ip)) rows`, `* | stats count(x) if (error ip:in(_time:1d | fields ip)) as rows`)
f(`* | stats count() if () rows`, `* | stats count(*) if () as rows`)
// sort pipe
f(`* | sort`, `* | sort`)
@ -983,6 +979,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | sort by (foo desc, bar) desc limit 10`, `* | sort by (foo desc, bar) desc limit 10`)
f(`* | sort by (foo desc, bar) desc OFFSET 30 limit 10`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
f(`* | sort by (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
f(`* | sort (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
// uniq pipe
f(`* | uniq`, `* | uniq`)
@ -991,8 +988,32 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | uniq by(foo,*,bar)`, `* | uniq`)
f(`* | uniq by(f1,f2)`, `* | uniq by (f1, f2)`)
f(`* | uniq by(f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
f(`* | uniq (f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
f(`* | uniq limit 10`, `* | uniq limit 10`)
// filter pipe
f(`* | filter error ip:12.3.4.5 or warn`, `* | filter error ip:12.3.4.5 or warn`)
f(`foo | stats by (host) count() logs | filter logs:>50 | sort by (logs desc) | limit 10`, `foo | stats by (host) count(*) as logs | filter logs:>50 | sort by (logs desc) | limit 10`)
// extract pipe
f(`* | extract "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
f(`* | extract from _msg "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
f(`* | extract from '' 'foo<bar>baz'`, `* | extract "foo<bar>baz"`)
f("* | extract from x `foo<bar>baz`", `* | extract from x "foo<bar>baz"`)
f("* | extract from x foo<bar>baz", `* | extract from x "foo<bar>baz"`)
// unpack_json pipe
f(`* | unpack_json`, `* | unpack_json`)
f(`* | unpack_json result_prefix y`, `* | unpack_json result_prefix y`)
f(`* | unpack_json from x`, `* | unpack_json from x`)
f(`* | unpack_json from x result_prefix y`, `* | unpack_json from x result_prefix y`)
// unpack_logfmt pipe
f(`* | unpack_logfmt`, `* | unpack_logfmt`)
f(`* | unpack_logfmt result_prefix y`, `* | unpack_logfmt result_prefix y`)
f(`* | unpack_logfmt from x`, `* | unpack_logfmt from x`)
f(`* | unpack_logfmt from x result_prefix y`, `* | unpack_logfmt from x result_prefix y`)
// multiple different pipes
f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`)
f(`* | skip 100 | head 20 | skip 10`, `* | offset 100 | limit 20 | offset 10`)
@ -1130,6 +1151,10 @@ func TestParseQueryFailure(t *testing.T) {
f(`in(foo, "bar baz"*, abc)`)
f(`in(foo bar)`)
f(`in(foo, bar`)
f(`in(foo|bar)`)
f(`in(|foo`)
f(`in(x | limit 10)`)
f(`in(x | fields a,b)`)
// invalid ipv4_range
f(`ipv4_range(`)
@ -1208,6 +1233,18 @@ func TestParseQueryFailure(t *testing.T) {
f(`foo | fields bar,`)
f(`foo | fields bar,,`)
// invalid field_names
f(`foo | field_names`)
f(`foo | field_names |`)
f(`foo | field_names (`)
f(`foo | field_names )`)
f(`foo | field_names ,`)
f(`foo | field_names ()`)
f(`foo | field_names (x)`)
f(`foo | field_names (x,y)`)
f(`foo | field_names x y`)
f(`foo | field_names x, y`)
// invalid copy and cp pipe
f(`foo | copy`)
f(`foo | cp`)
@ -1359,6 +1396,39 @@ func TestParseQueryFailure(t *testing.T) {
f(`foo | uniq by(a) bar`)
f(`foo | uniq by(a) limit -10`)
f(`foo | uniq by(a) limit foo`)
// invalid filter pipe
f(`foo | filter`)
f(`foo | filter | sort by (x)`)
f(`foo | filter (`)
f(`foo | filter )`)
// invalid extract pipe
f(`foo | extract`)
f(`foo | extract bar`)
f(`foo | extract "xy"`)
f(`foo | extract "<>"`)
f(`foo | extract "foo<>foo"`)
f(`foo | extract "foo<>foo<_>bar<*>asdf"`)
f(`foo | extract from`)
f(`foo | extract from x`)
f(`foo | extract from x "abc"`)
f(`foo | extract from x "<abc`)
f(`foo | extract from x "<abc>" de`)
// invalid unpack_json pipe
f(`foo | unpack_json bar`)
f(`foo | unpack_json from`)
f(`foo | unpack_json result_prefix`)
f(`foo | unpack_json result_prefix x from y`)
f(`foo | unpack_json from x result_prefix`)
// invalid unpack_logfmt pipe
f(`foo | unpack_logfmt bar`)
f(`foo | unpack_logfmt from`)
f(`foo | unpack_logfmt result_prefix`)
f(`foo | unpack_logfmt result_prefix x from y`)
f(`foo | unpack_logfmt from x result_prefix`)
}
func TestQueryGetNeededColumns(t *testing.T) {
@ -1367,8 +1437,9 @@ func TestQueryGetNeededColumns(t *testing.T) {
q, err := ParseQuery(s)
if err != nil {
t.Fatalf("cannot parse query %s: %s", s, err)
t.Fatalf("cannot parse query [%s]: %s", s, err)
}
q.Optimize()
needed, unneeded := q.getNeededColumns()
neededColumns := strings.Join(needed, ",")
@ -1454,11 +1525,10 @@ func TestQueryGetNeededColumns(t *testing.T) {
f(`* | sort by (f1) | sort by (f2,f3 desc) desc | fields f4 | rm f1,f2,f5`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, ``, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, `f1`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f1,f5`, `f1`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r1`, `f1,f2`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r2,r3`, `f1,f3,f4`, ``)
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
f(`* | stats count(f1) r1 | stats count() r1`, ``, ``)
f(`* | stats count(f1) r1 | stats count() r2`, ``, ``)
f(`* | stats count(f1) r1 | stats count(r1) r2`, `f1`, ``)
@ -1470,12 +1540,60 @@ func TestQueryGetNeededColumns(t *testing.T) {
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f2) r1, count(r1) r2 | fields r2`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f3) r1, count(r1) r2 | fields r1`, `f3,f4`, ``)
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
f(`_time:1y | stats (_time:1w) count() r1 | stats count() r2`, `_time`, ``)
f(`* | uniq`, `*`, ``)
f(`* | uniq by (f1,f2)`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | fields f1,f3`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | rm f1,f3`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | fields f3`, `f1,f2`, ``)
f(`* | filter foo f1:bar`, `*`, ``)
f(`* | filter foo f1:bar | fields f2`, `f2`, ``)
f(`* | limit 10 | filter foo f1:bar | fields f2`, `_msg,f1,f2`, ``)
f(`* | filter foo f1:bar | fields f1`, `f1`, ``)
f(`* | filter foo f1:bar | rm f1`, `*`, `f1`)
f(`* | limit 10 | filter foo f1:bar | rm f1`, `*`, ``)
f(`* | filter foo f1:bar | rm f2`, `*`, `f2`)
f(`* | limit 10 | filter foo f1:bar | rm f2`, `*`, `f2`)
f(`* | fields x | filter foo f1:bar | rm f2`, `x`, ``)
f(`* | fields x,f1 | filter foo f1:bar | rm f2`, `f1,x`, ``)
f(`* | rm x,f1 | filter foo f1:bar`, `*`, `f1,x`)
f(`* | field_names as foo`, `*`, `_time`)
f(`* | field_names foo | fields bar`, `*`, `_time`)
f(`* | field_names foo | fields foo`, `*`, `_time`)
f(`* | field_names foo | rm foo`, `*`, `_time`)
f(`* | field_names foo | rm bar`, `*`, `_time`)
f(`* | field_names foo | rm _time`, `*`, `_time`)
f(`* | fields x,y | field_names as bar | fields baz`, `x,y`, ``)
f(`* | rm x,y | field_names as bar | fields baz`, `*`, `x,y`)
f(`* | extract from s1 "<f1>x<f2>"`, `*`, `f1,f2`)
f(`* | extract from s1 "<f1>x<f2>" | fields foo`, `foo`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,s1`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1,f2`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | rm foo`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,s1`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1,f2`, `*`, `f1,f2,foo,s1`)
f(`* | unpack_json`, `*`, ``)
f(`* | unpack_json from s1`, `*`, ``)
f(`* | unpack_json from s1 | fields f1`, `f1,s1`, ``)
f(`* | unpack_json from s1 | fields s1,f1`, `f1,s1`, ``)
f(`* | unpack_json from s1 | rm f1`, `*`, `f1`)
f(`* | unpack_json from s1 | rm f1,s1`, `*`, `f1`)
f(`* | unpack_logfmt`, `*`, ``)
f(`* | unpack_logfmt from s1`, `*`, ``)
f(`* | unpack_logfmt from s1 | fields f1`, `f1,s1`, ``)
f(`* | unpack_logfmt from s1 | fields s1,f1`, `f1,s1`, ``)
f(`* | unpack_logfmt from s1 | rm f1`, `*`, `f1`)
f(`* | unpack_logfmt from s1 | rm f1,s1`, `*`, `f1`)
f(`* | rm f1, f2`, `*`, `f1,f2`)
f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`)
f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`)

View file

@ -67,67 +67,103 @@ func parsePipes(lex *lexer) ([]pipe, error) {
if !lex.isKeyword("|") {
return nil, fmt.Errorf("expecting '|'; got %q", lex.token)
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing token after '|'")
lex.nextToken()
p, err := parsePipe(lex)
if err != nil {
return nil, err
}
pipes = append(pipes, p)
}
return pipes, nil
}
func parsePipe(lex *lexer) (pipe, error) {
switch {
case lex.isKeyword("stats"):
ps, err := parsePipeStats(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("uniq"):
pu, err := parsePipeUniq(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
}
pipes = append(pipes, pu)
case lex.isKeyword("limit", "head"):
pl, err := parsePipeLimit(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
}
pipes = append(pipes, pl)
case lex.isKeyword("offset", "skip"):
ps, err := parsePipeOffset(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("fields"):
pf, err := parsePipeFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
}
pipes = append(pipes, pf)
case lex.isKeyword("copy", "cp"):
pc, err := parsePipeCopy(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
}
pipes = append(pipes, pc)
case lex.isKeyword("rename", "mv"):
pr, err := parsePipeRename(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
}
pipes = append(pipes, pr)
return pc, nil
case lex.isKeyword("delete", "del", "rm"):
pd, err := parsePipeDelete(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
}
pipes = append(pipes, pd)
return pd, nil
case lex.isKeyword("extract"):
pe, err := parsePipeExtract(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'extract' pipe: %w", err)
}
return pe, nil
case lex.isKeyword("field_names"):
pf, err := parsePipeFieldNames(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'field_names' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("fields"):
pf, err := parsePipeFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("filter"):
pf, err := parsePipeFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'filter' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("limit", "head"):
pl, err := parsePipeLimit(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
}
return pl, nil
case lex.isKeyword("offset", "skip"):
ps, err := parsePipeOffset(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("rename", "mv"):
pr, err := parsePipeRename(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("stats"):
ps, err := parsePipeStats(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("uniq"):
pu, err := parsePipeUniq(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
}
return pu, nil
case lex.isKeyword("unpack_json"):
pu, err := parsePipeUnpackJSON(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'unpack_json' pipe: %w", err)
}
return pu, nil
case lex.isKeyword("unpack_logfmt"):
pu, err := parsePipeUnpackLogfmt(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
}
return pu, nil
default:
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
}
}
return pipes, nil
}

View file

@ -40,7 +40,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
if neededFields.contains("*") {
// update only unneeded fields
unneededFields.addAll(pc.dstFields)
unneededFields.addFields(pc.dstFields)
for i, srcField := range pc.srcFields {
if neededSrcFields[i] {
unneededFields.remove(srcField)
@ -48,7 +48,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
} else {
// update only needed fields and reset unneeded fields
neededFields.removeAll(pc.dstFields)
neededFields.removeFields(pc.dstFields)
for i, srcField := range pc.srcFields {
if neededSrcFields[i] {
neededFields.add(srcField)

View file

@ -6,20 +6,9 @@ import (
)
func TestPipeCopyUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeCopy(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
@ -53,6 +42,22 @@ func TestPipeCopyUpdateNeededFields(t *testing.T) {
f("copy s1 d1, s2 d2", "s2,d1,f1,f2", "", "s1,s2,f1,f2", "")
}
func expectPipeNeededFields(t *testing.T, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
}
func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
@ -75,7 +80,7 @@ func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected,
func newTestFieldsSet(fields string) fieldsSet {
fs := newFieldsSet()
if fields != "" {
fs.addAll(strings.Split(fields, ","))
fs.addFields(strings.Split(fields, ","))
}
return fs
}

View file

@ -25,10 +25,10 @@ func (pd *pipeDelete) String() string {
func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
// update only unneeded fields
unneededFields.addAll(pd.fields)
unneededFields.addFields(pd.fields)
} else {
// update only needed fields
neededFields.removeAll(pd.fields)
neededFields.removeFields(pd.fields)
}
}

View file

@ -7,18 +7,7 @@ import (
func TestPipeDeleteUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeDelete(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields

View file

@ -0,0 +1,357 @@
package logstorage
import (
"fmt"
"html"
"strconv"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// pipeExtract processes '| extract from <field> <pattern>' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
type pipeExtract struct {
fromField string
steps []extractFormatStep
pattern string
}
func (pe *pipeExtract) String() string {
s := "extract"
if !isMsgFieldName(pe.fromField) {
s += " from " + quoteTokenIfNeeded(pe.fromField)
}
s += " " + quoteTokenIfNeeded(pe.pattern)
return s
}
func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone()
needFromField := false
for _, step := range pe.steps {
if step.field != "" {
if !unneededFieldsOrig.contains(step.field) {
needFromField = true
}
unneededFields.add(step.field)
}
}
if needFromField {
unneededFields.remove(pe.fromField)
} else {
unneededFields.add(pe.fromField)
}
} else {
needFromField := false
for _, step := range pe.steps {
if step.field != "" && neededFields.contains(step.field) {
needFromField = true
neededFields.remove(step.field)
}
}
if needFromField {
neededFields.add(pe.fromField)
}
}
}
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeExtractProcessorShard, workersCount)
for i := range shards {
ef := newExtractFormat(pe.steps)
rcs := make([]resultColumn, len(ef.fields))
for j := range rcs {
rcs[j].name = ef.fields[j].name
}
shards[i] = pipeExtractProcessorShard{
pipeExtractProcessorShardNopad: pipeExtractProcessorShardNopad{
ef: ef,
rcs: rcs,
},
}
}
pep := &pipeExtractProcessor{
pe: pe,
ppBase: ppBase,
shards: shards,
}
return pep
}
type pipeExtractProcessor struct {
pe *pipeExtract
ppBase pipeProcessor
shards []pipeExtractProcessorShard
}
type pipeExtractProcessorShard struct {
pipeExtractProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeExtractProcessorShardNopad{})%128]byte
}
type pipeExtractProcessorShardNopad struct {
ef *extractFormat
rcs []resultColumn
}
func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pep.shards[workerID]
ef := shard.ef
rcs := shard.rcs
c := br.getColumnByName(pep.pe.fromField)
if c.isConst {
v := c.valuesEncoded[0]
ef.apply(v)
for i, f := range ef.fields {
fieldValue := *f.value
rc := &rcs[i]
for range br.timestamps {
rc.addValue(fieldValue)
}
}
} else {
values := c.getValues(br)
for i, v := range values {
if i == 0 || values[i-1] != v {
ef.apply(v)
}
for j, f := range ef.fields {
rcs[j].addValue(*f.value)
}
}
}
br.addResultColumns(rcs)
pep.ppBase.writeBlock(workerID, br)
for i := range rcs {
rcs[i].resetValues()
}
}
func (pep *pipeExtractProcessor) flush() error {
return nil
}
func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
if !lex.isKeyword("extract") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "extract")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
pattern, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot read 'pattern': %w", err)
}
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", pattern, err)
}
pe := &pipeExtract{
fromField: fromField,
steps: steps,
pattern: pattern,
}
return pe, nil
}
type extractFormat struct {
// steps contains steps for extracting fields from string
steps []extractFormatStep
// matches contains matches for every step in steps
matches []string
// fields contains matches for non-empty fields
fields []extractField
}
type extractField struct {
name string
value *string
}
type extractFormatStep struct {
prefix string
field string
}
func newExtractFormat(steps []extractFormatStep) *extractFormat {
if len(steps) == 0 {
logger.Panicf("BUG: steps cannot be empty")
}
matches := make([]string, len(steps))
var fields []extractField
for i, step := range steps {
if step.field != "" {
fields = append(fields, extractField{
name: step.field,
value: &matches[i],
})
}
}
if len(fields) == 0 {
logger.Panicf("BUG: fields cannot be empty")
}
ef := &extractFormat{
steps: steps,
matches: matches,
fields: fields,
}
return ef
}
func (ef *extractFormat) apply(s string) {
clear(ef.matches)
steps := ef.steps
if prefix := steps[0].prefix; prefix != "" {
n := strings.Index(s, prefix)
if n < 0 {
// Mismatch
return
}
s = s[n+len(prefix):]
}
matches := ef.matches
for i := range steps {
nextPrefix := ""
if i+1 < len(steps) {
nextPrefix = steps[i+1].prefix
}
us, nOffset := tryUnquoteString(s)
if nOffset >= 0 {
// Matched quoted string
matches[i] = us
s = s[nOffset:]
if !strings.HasPrefix(s, nextPrefix) {
// Mismatch
return
}
s = s[len(nextPrefix):]
} else {
// Match unquoted string until the nextPrefix
if nextPrefix == "" {
matches[i] = s
return
}
n := strings.Index(s, nextPrefix)
if n < 0 {
// Mismatch
return
}
matches[i] = s[:n]
s = s[n+len(nextPrefix):]
}
}
}
func tryUnquoteString(s string) (string, int) {
if len(s) == 0 {
return s, -1
}
if s[0] != '"' && s[0] != '`' {
return s, -1
}
qp, err := strconv.QuotedPrefix(s)
if err != nil {
return s, -1
}
us, err := strconv.Unquote(qp)
if err != nil {
return s, -1
}
return us, len(qp)
}
func parseExtractFormatSteps(s string) ([]extractFormatStep, error) {
var steps []extractFormatStep
hasNamedField := false
n := strings.IndexByte(s, '<')
if n < 0 {
return nil, fmt.Errorf("missing <...> fields")
}
prefix := s[:n]
s = s[n+1:]
for {
n := strings.IndexByte(s, '>')
if n < 0 {
return nil, fmt.Errorf("missing '>' for <%s", s)
}
field := s[:n]
s = s[n+1:]
if field == "_" || field == "*" {
field = ""
}
steps = append(steps, extractFormatStep{
prefix: prefix,
field: field,
})
if !hasNamedField && field != "" {
hasNamedField = true
}
if len(s) == 0 {
break
}
n = strings.IndexByte(s, '<')
if n < 0 {
steps = append(steps, extractFormatStep{
prefix: s,
})
break
}
if n == 0 {
return nil, fmt.Errorf("missing delimiter after <%s>", field)
}
prefix = s[:n]
s = s[n+1:]
}
if !hasNamedField {
return nil, fmt.Errorf("missing named fields like <name>")
}
for i := range steps {
step := &steps[i]
step.prefix = html.UnescapeString(step.prefix)
}
return steps, nil
}

View file

@ -0,0 +1,213 @@
package logstorage
import (
"reflect"
"testing"
)
func TestExtractFormatApply(t *testing.T) {
f := func(pattern, s string, resultsExpected []string) {
t.Helper()
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
ef := newExtractFormat(steps)
ef.apply(s)
if len(ef.fields) != len(resultsExpected) {
t.Fatalf("unexpected number of results; got %d; want %d", len(ef.fields), len(resultsExpected))
}
for i, f := range ef.fields {
if v := *f.value; v != resultsExpected[i] {
t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
}
}
}
f("<foo>", "", []string{""})
f("<foo>", "abc", []string{"abc"})
f("<foo>bar", "", []string{""})
f("<foo>bar", "bar", []string{""})
f("<foo>bar", "bazbar", []string{"baz"})
f("<foo>bar", "a bazbar xdsf", []string{"a baz"})
f("<foo>bar<>", "a bazbar xdsf", []string{"a baz"})
f("<foo>bar<>x", "a bazbar xdsf", []string{"a baz"})
f("foo<bar>", "", []string{""})
f("foo<bar>", "foo", []string{""})
f("foo<bar>", "a foo xdf sdf", []string{" xdf sdf"})
f("foo<bar>", "a foo foobar", []string{" foobar"})
f("foo<bar>baz", "a foo foobar", []string{""})
f("foo<bar>baz", "a foobaz bar", []string{""})
f("foo<bar>baz", "a foo foobar baz", []string{" foobar "})
f("foo<bar>baz", "a foo foobar bazabc", []string{" foobar "})
f("ip=<ip> <> path=<path> ", "x=a, ip=1.2.3.4 method=GET host='abc' path=/foo/bar some tail here", []string{"1.2.3.4", "/foo/bar"})
// escaped pattern
f("ip=&lt;<ip>&gt;", "foo ip=<1.2.3.4> bar", []string{"1.2.3.4"})
f("ip=&lt;<ip>&gt;", "foo ip=<foo&amp;bar> bar", []string{"foo&amp;bar"})
// quoted fields
f(`"msg":<msg>,`, `{"foo":"bar","msg":"foo,b\"ar\n\t","baz":"x"}`, []string{`foo,b"ar` + "\n\t"})
f(`foo=<bar>`, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
f(`foo=<bar> `, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
f(`<foo>`, `"foo,\"bar"`, []string{`foo,"bar`})
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
}
func TestParseExtractFormatStepsSuccess(t *testing.T) {
f := func(s string, stepsExpected []extractFormatStep) {
t.Helper()
steps, err := parseExtractFormatSteps(s)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", s, err)
}
if !reflect.DeepEqual(steps, stepsExpected) {
t.Fatalf("unexpected steps for [%s]; got %v; want %v", s, steps, stepsExpected)
}
}
f("<foo>", []extractFormatStep{
{
field: "foo",
},
})
f("<foo>bar", []extractFormatStep{
{
field: "foo",
},
{
prefix: "bar",
},
})
f("<>bar<foo>", []extractFormatStep{
{},
{
prefix: "bar",
field: "foo",
},
})
f("bar<foo>", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
})
f("bar<foo>abc", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "abc",
},
})
f("bar<foo>abc<_>", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "abc",
},
})
f("<foo>bar<baz>", []extractFormatStep{
{
field: "foo",
},
{
prefix: "bar",
field: "baz",
},
})
f("bar<foo>baz", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "baz",
},
})
f("&lt;<foo>&amp;gt;", []extractFormatStep{
{
prefix: "<",
field: "foo",
},
{
prefix: "&gt;",
},
})
}
func TestParseExtractFormatStepFailure(t *testing.T) {
f := func(s string) {
t.Helper()
_, err := parseExtractFormatSteps(s)
if err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
}
}
// empty string
f("")
// zero fields
f("foobar")
// Zero named fields
f("<>")
f("foo<>")
f("<>foo")
f("foo<_>bar<*>baz<>xxx")
// missing delimiter between fields
f("<foo><bar>")
f("<><bar>")
f("<foo><>")
f("bb<foo><><bar>aa")
f("aa<foo><bar>")
f("aa<foo><bar>bb")
// missing >
f("<foo")
f("foo<bar")
}
func TestPipeExtractUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("extract from x '<foo>'", "*", "", "*", "foo")
// all the needed fields, unneeded fields do not intersect with fromField and output fields
f("extract from x '<foo>'", "*", "f1,f2", "*", "f1,f2,foo")
// all the needed fields, unneeded fields intersect with fromField
f("extract from x '<foo>'", "*", "f2,x", "*", "f2,foo")
// all the needed fields, unneeded fields intersect with output fields
f("extract from x '<foo>x<bar>'", "*", "f2,foo", "*", "bar,f2,foo")
// all the needed fields, unneeded fields intersect with all the output fields
f("extract from x '<foo>x<bar>'", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
// needed fields do not intersect with fromField and output fields
f("extract from x '<foo>x<bar>'", "f1,f2", "", "f1,f2", "")
// needed fields intersect with fromField
f("extract from x '<foo>x<bar>'", "f2,x", "", "f2,x", "")
// needed fields intersect with output fields
f("extract from x '<foo>x<bar>'", "f2,foo", "", "f2,x", "")
// needed fields intersect with fromField and output fields
f("extract from x '<foo>x<bar>'", "f2,foo,x,y", "", "f2,x,y", "")
}

View file

@ -0,0 +1,80 @@
package logstorage
import (
"testing"
)
func BenchmarkExtractFormatApply(b *testing.B) {
a := []string{
`{"level":"error","ts":1716113701.63973,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498","foo":"bar"}`,
`{"level":"error","ts":1716113370.2321634,"caller":"gcm/export.go:434","msg":"Failed to export metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).exportBuffer\n\tcloud/kubernetes/metrics/common/gcm/export.go:434\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:383\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).Flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:365\ngoogle3/cloud/kubernetes/metrics/components/collector/adapter/adapter.(*adapter).Finalize\n\tcloud/kubernetes/metrics/components/collector/adapter/consume.go:131\ngoogle3/cloud/kubernetes/metrics/components/collector/prometheus/prometheus.(*parser).ParseText\n\tcloud/kubernetes/metrics/components/collector/prometheus/parse.go:158\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:103\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
`{"level":"error","ts":1716113127.7496774,"caller":"collector/collector.go:105","msg":"Failed to process metrics","scrape_target":"http://localhost:8093/metrics","error":"failed to finalize exporting: \"2 errors occurred:\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\n\"","stacktrace":"google3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:105\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
`{"level":"error","ts":1716113547.6429873,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498", "foo":"bar"}`,
`{"level":"error","ts":1716113541.4445803,"caller":"periodicexporter/periodic_exporter.go:180","msg":"Failed to flush metrics to Cloud Monitoring","error":"1 error occurred:\n\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\n\n","stacktrace":"google3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).exportAll\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:180\ngoogle3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).periodicExporter\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:157","foo":"bar"}`,
}
b.Run("single-small-field-at-start", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>"`, a)
})
b.Run("single-small-field-at-start-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>`, a)
})
b.Run("single-small-field-at-end", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"foo":"<foo>"`, a)
})
b.Run("single-small-field-at-end-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"foo":<foo>`, a)
})
b.Run("single-medium-field", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"msg":"<message>"`, a)
})
b.Run("single-medium-field-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"msg":<message>`, a)
})
b.Run("single-large-field", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"stacktrace":"<stacktrace>"`, a)
})
b.Run("single-large-field-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"stacktrace":<stacktrace>`, a)
})
b.Run("two-fields", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>",<_>"msg":"<msg>"`, a)
})
b.Run("two-fields-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>,<_>"msg":<msg>`, a)
})
b.Run("many-fields", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>","ts":"<ts>","caller":"<caller>","msg":"<msg>","error":"<error>"`, a)
})
b.Run("many-fields-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>,"ts":<ts>,"caller":<caller>,"msg":<msg>,"error":<error>`, a)
})
}
func benchmarkExtractFormatApply(b *testing.B, pattern string, a []string) {
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
b.Fatalf("unexpected error: %s", err)
}
n := 0
for _, s := range a {
n += len(s)
}
b.ReportAllocs()
b.SetBytes(int64(n))
b.RunParallel(func(pb *testing.PB) {
sink := 0
ef := newExtractFormat(steps)
for pb.Next() {
for _, s := range a {
ef.apply(s)
for _, v := range ef.matches {
sink += len(v)
}
}
}
GlobalSink.Add(uint64(sink))
})
}

View file

@ -0,0 +1,167 @@
package logstorage
import (
"fmt"
"strings"
"unsafe"
)
// pipeFieldNames processes '| field_names' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#field-names-pipe
type pipeFieldNames struct {
// resultName is the name of the column to write results to.
resultName string
// isFirstPipe is set to true if '| field_names' pipe is the first in the query.
//
// This allows skipping loading of _time column.
isFirstPipe bool
}
func (pf *pipeFieldNames) String() string {
return "field_names as " + quoteTokenIfNeeded(pf.resultName)
}
func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {
neededFields.add("*")
unneededFields.reset()
if pf.isFirstPipe {
unneededFields.add("_time")
}
}
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeFieldNamesProcessorShard, workersCount)
for i := range shards {
shards[i] = pipeFieldNamesProcessorShard{
pipeFieldNamesProcessorShardNopad: pipeFieldNamesProcessorShardNopad{
m: make(map[string]struct{}),
},
}
}
pfp := &pipeFieldNamesProcessor{
pf: pf,
stopCh: stopCh,
ppBase: ppBase,
shards: shards,
}
return pfp
}
type pipeFieldNamesProcessor struct {
pf *pipeFieldNames
stopCh <-chan struct{}
ppBase pipeProcessor
shards []pipeFieldNamesProcessorShard
}
type pipeFieldNamesProcessorShard struct {
pipeFieldNamesProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFieldNamesProcessorShardNopad{})%128]byte
}
type pipeFieldNamesProcessorShardNopad struct {
// m holds unique field names.
m map[string]struct{}
}
func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
cs := br.getColumns()
for _, c := range cs {
if _, ok := shard.m[c.name]; !ok {
nameCopy := strings.Clone(c.name)
shard.m[nameCopy] = struct{}{}
}
}
}
func (pfp *pipeFieldNamesProcessor) flush() error {
if needStop(pfp.stopCh) {
return nil
}
// merge state across shards
shards := pfp.shards
m := shards[0].m
shards = shards[1:]
for i := range shards {
for k := range shards[i].m {
m[k] = struct{}{}
}
}
if pfp.pf.isFirstPipe {
m["_time"] = struct{}{}
}
// write result
wctx := &pipeFieldNamesWriteContext{
pfp: pfp,
}
wctx.rcs[0].name = pfp.pf.resultName
for k := range m {
wctx.writeRow(k)
}
wctx.flush()
return nil
}
type pipeFieldNamesWriteContext struct {
pfp *pipeFieldNamesProcessor
rcs [1]resultColumn
br blockResult
valuesLen int
}
func (wctx *pipeFieldNamesWriteContext) writeRow(v string) {
wctx.rcs[0].addValue(v)
wctx.valuesLen += len(v)
if wctx.valuesLen >= 1_000_000 {
wctx.flush()
}
}
func (wctx *pipeFieldNamesWriteContext) flush() {
br := &wctx.br
wctx.valuesLen = 0
// Flush rcs to ppBase
br.setResultColumns(wctx.rcs[:1])
wctx.pfp.ppBase.writeBlock(0, br)
br.reset()
wctx.rcs[0].resetValues()
}
func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
if !lex.isKeyword("field_names") {
return nil, fmt.Errorf("expecting 'field_names'; got %q", lex.token)
}
lex.nextToken()
if lex.isKeyword("as") {
lex.nextToken()
}
resultName, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
}
pf := &pipeFieldNames{
resultName: resultName,
}
return pf, nil
}

View file

@ -0,0 +1,27 @@
package logstorage
import (
"testing"
)
func TestPipeFieldNamesUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("field_names as f1", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("field_names as f3", "*", "f1,f2", "*", "")
// all the needed fields, unneeded fields intersect with src
f("field_names as f1", "*", "s1,f1,f2", "*", "")
// needed fields do not intersect with src
f("field_names as f3", "f1,f2", "", "*", "")
// needed fields intersect with src
f("field_names as f1", "s1,f1,f2", "", "*", "")
}

View file

@ -32,7 +32,7 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
if neededFields.contains("*") {
// subtract unneeded fields from pf.fields
neededFields.reset()
neededFields.addAll(pf.fields)
neededFields.addFields(pf.fields)
for _, f := range unneededFields.getAll() {
neededFields.remove(f)
}

View file

@ -7,18 +7,7 @@ import (
func TestPipeFieldsUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeFields(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields

View file

@ -0,0 +1,108 @@
package logstorage
import (
"fmt"
"unsafe"
)
// pipeFilter processes '| filter ...' queries.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe
type pipeFilter struct {
// f is a filter to apply to the written rows.
f filter
}
func (pf *pipeFilter) String() string {
return "filter " + pf.f.String()
}
func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
fs := newFieldsSet()
pf.f.updateNeededFields(fs)
for f := range fs {
unneededFields.remove(f)
}
} else {
pf.f.updateNeededFields(neededFields)
}
}
func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeFilterProcessorShard, workersCount)
pfp := &pipeFilterProcessor{
pf: pf,
ppBase: ppBase,
shards: shards,
}
return pfp
}
type pipeFilterProcessor struct {
pf *pipeFilter
ppBase pipeProcessor
shards []pipeFilterProcessorShard
}
type pipeFilterProcessorShard struct {
pipeFilterProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFilterProcessorShardNopad{})%128]byte
}
type pipeFilterProcessorShardNopad struct {
br blockResult
bm bitmap
}
func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
pfp.pf.f.applyToBlockResult(br, bm)
if bm.areAllBitsSet() {
// Fast path - the filter didn't filter out anything - send br to the base pipe as is.
pfp.ppBase.writeBlock(workerID, br)
return
}
if bm.isZero() {
// Nothing to send
return
}
// Slow path - copy the remaining rows from br to shard.br before sending them to base pipe.
shard.br.initFromFilterAllColumns(br, bm)
pfp.ppBase.writeBlock(workerID, &shard.br)
}
func (pfp *pipeFilterProcessor) flush() error {
return nil
}
func parsePipeFilter(lex *lexer) (*pipeFilter, error) {
if !lex.isKeyword("filter") {
return nil, fmt.Errorf("expecting 'filter'; got %q", lex.token)
}
lex.nextToken()
f, err := parseFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'filter': %w", err)
}
pf := &pipeFilter{
f: f,
}
return pf, nil
}

View file

@ -0,0 +1,27 @@
package logstorage
import (
"testing"
)
func TestPipeFilterUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("filter foo f1:bar", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("filter foo f3:bar", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("filter foo f1:bar", "*", "s1,f1,f2", "*", "s1,f2")
// needed fields do not intersect with src
f("filter foo f3:bar", "f1,f2", "", "_msg,f1,f2,f3", "")
// needed fields intersect with src
f("filter foo f1:bar", "s1,f1,f2", "", "_msg,f1,f2,s1", "")
}

View file

@ -9,18 +9,18 @@ import (
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
type pipeLimit struct {
n uint64
limit uint64
}
func (pl *pipeLimit) String() string {
return fmt.Sprintf("limit %d", pl.n)
return fmt.Sprintf("limit %d", pl.limit)
}
func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
}
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
if pl.n == 0 {
if pl.limit == 0 {
// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
cancel()
}
@ -45,7 +45,7 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
}
rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= plp.pl.n {
if rowsProcessed <= plp.pl.limit {
// Fast path - write all the rows to ppBase.
plp.ppBase.writeBlock(workerID, br)
return
@ -53,13 +53,13 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
// Slow path - overflow. Write the remaining rows if needed.
rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= plp.pl.n {
if rowsProcessed >= plp.pl.limit {
// Nothing to write. There is no need in cancel() call, since it has been called by another goroutine.
return
}
// Write remaining rows.
keepRows := plp.pl.n - rowsProcessed
keepRows := plp.pl.limit - rowsProcessed
br.truncateRows(int(keepRows))
plp.ppBase.writeBlock(workerID, br)
@ -83,7 +83,7 @@ func parsePipeLimit(lex *lexer) (*pipeLimit, error) {
}
lex.nextToken()
pl := &pipeLimit{
n: n,
limit: n,
}
return pl, nil
}

View file

@ -0,0 +1,21 @@
package logstorage
import (
"testing"
)
func TestPipeLimitUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("limit 10", "*", "", "*", "")
// all the needed fields, plus unneeded fields
f("limit 10", "*", "f1,f2", "*", "f1,f2")
// needed fields
f("limit 10", "f1,f2", "", "f1,f2", "")
}

View file

@ -9,11 +9,11 @@ import (
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#offset-pipe
type pipeOffset struct {
n uint64
offset uint64
}
func (po *pipeOffset) String() string {
return fmt.Sprintf("offset %d", po.n)
return fmt.Sprintf("offset %d", po.offset)
}
func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
@ -39,17 +39,17 @@ func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {
}
rowsProcessed := pop.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= pop.po.n {
if rowsProcessed <= pop.po.offset {
return
}
rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= pop.po.n {
if rowsProcessed >= pop.po.offset {
pop.ppBase.writeBlock(workerID, br)
return
}
rowsSkip := pop.po.n - rowsProcessed
rowsSkip := pop.po.offset - rowsProcessed
br.skipRows(int(rowsSkip))
pop.ppBase.writeBlock(workerID, br)
}
@ -70,7 +70,7 @@ func parsePipeOffset(lex *lexer) (*pipeOffset, error) {
}
lex.nextToken()
po := &pipeOffset{
n: n,
offset: n,
}
return po, nil
}

View file

@ -0,0 +1,21 @@
package logstorage
import (
"testing"
)
func TestPipeOffsetUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("offset 10", "*", "", "*", "")
// all the needed fields, plus unneeded fields
f("offset 10", "*", "f1,f2", "*", "f1,f2")
// needed fields
f("offset 10", "f1,f2", "", "f1,f2", "")
}

View file

@ -40,7 +40,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
}
if neededFields.contains("*") {
// update only unneeded fields
unneededFields.addAll(pr.dstFields)
unneededFields.addFields(pr.dstFields)
for i, srcField := range pr.srcFields {
if neededSrcFields[i] {
unneededFields.remove(srcField)
@ -50,7 +50,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
}
} else {
// update only needed fields and reset unneeded fields
neededFields.removeAll(pr.dstFields)
neededFields.removeFields(pr.dstFields)
for i, srcField := range pr.srcFields {
if neededSrcFields[i] {
neededFields.add(srcField)

View file

@ -7,18 +7,7 @@ import (
func TestPipeRenameUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeRename(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields

View file

@ -11,7 +11,6 @@ import (
"sync/atomic"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
)
@ -80,9 +79,12 @@ func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
shards := make([]pipeSortProcessorShard, workersCount)
for i := range shards {
shard := &shards[i]
shard.ps = ps
shard.stateSizeBudget = stateSizeBudgetChunk
shards[i] = pipeSortProcessorShard{
pipeSortProcessorShardNopad: pipeSortProcessorShardNopad{
ps: ps,
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk
}
@ -202,12 +204,14 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
columnValues := shard.columnValues[:0]
for _, c := range cs {
columnValues = append(columnValues, c.getValues(br))
values := c.getValues(br)
columnValues = append(columnValues, values)
}
shard.columnValues = columnValues
// Generate byColumns
var rc resultColumn
valuesEncoded := make([]string, len(br.timestamps))
shard.stateSizeBudget -= len(valuesEncoded) * int(unsafe.Sizeof(valuesEncoded[0]))
bb := bbPool.Get()
for rowIdx := range br.timestamps {
@ -219,7 +223,12 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v)
bb.B = append(bb.B, ',')
}
rc.addValue(bytesutil.ToUnsafeString(bb.B))
if rowIdx > 0 && valuesEncoded[rowIdx-1] == string(bb.B) {
valuesEncoded[rowIdx] = valuesEncoded[rowIdx-1]
} else {
valuesEncoded[rowIdx] = string(bb.B)
shard.stateSizeBudget -= len(bb.B)
}
}
bbPool.Put(bb)
@ -232,13 +241,13 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
{
c: &blockResultColumn{
valueType: valueTypeString,
encodedValues: rc.values,
valuesEncoded: valuesEncoded,
},
i64Values: i64Values,
f64Values: f64Values,
},
}
shard.stateSizeBudget -= len(rc.buf) + int(unsafe.Sizeof(byColumns[0])+unsafe.Sizeof(*byColumns[0].c))
shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]) + unsafe.Sizeof(*byColumns[0].c))
// Append br to shard.blocks.
shard.blocks = append(shard.blocks, sortBlock{
@ -260,8 +269,8 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
continue
}
if c.isConst {
bc.i64Values = shard.createInt64Values(c.encodedValues)
bc.f64Values = shard.createFloat64Values(c.encodedValues)
bc.i64Values = shard.createInt64Values(c.valuesEncoded)
bc.f64Values = shard.createFloat64Values(c.valuesEncoded)
continue
}
@ -512,14 +521,10 @@ func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
rcs = wctx.rcs[:0]
for _, bf := range byFields {
rcs = append(rcs, resultColumn{
name: bf.name,
})
rcs = appendResultColumnWithName(rcs, bf.name)
}
for _, c := range b.otherColumns {
rcs = append(rcs, resultColumn{
name: c.name,
})
rcs = appendResultColumnWithName(rcs, c.name)
}
wctx.rcs = rcs
}
@ -558,7 +563,7 @@ func (wctx *pipeSortWriteContext) flush() {
wctx.psp.ppBase.writeBlock(0, br)
br.reset()
for i := range rcs {
rcs[i].resetKeepName()
rcs[i].resetValues()
}
}
@ -610,8 +615,8 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
if cA.c.isConst && cB.c.isConst {
// Fast path - compare const values
ccA := cA.c.encodedValues[0]
ccB := cB.c.encodedValues[0]
ccA := cA.c.valuesEncoded[0]
ccB := cB.c.valuesEncoded[0]
if ccA == ccB {
continue
}
@ -689,8 +694,10 @@ func parsePipeSort(lex *lexer) (*pipeSort, error) {
lex.nextToken()
var ps pipeSort
if lex.isKeyword("by", "(") {
if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseBySortFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)

View file

@ -7,18 +7,7 @@ import (
func TestPipeSortUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeSort(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields

View file

@ -20,23 +20,34 @@ type pipeStats struct {
// byFields contains field names with optional buckets from 'by(...)' clause.
byFields []*byStatsField
// resultNames contains names of output results generated by funcs.
resultNames []string
// funcs contains stats functions to execute.
funcs []statsFunc
funcs []pipeStatsFunc
}
type pipeStatsFunc struct {
// f is stats function to execute
f statsFunc
// neededFieldsForFunc contains needed fields for f execution
neededFieldsForFunc []string
// iff is an additional filter, which is applied to results before executing f on them
iff filter
// resultName is the name of the output generated by f
resultName string
}
type statsFunc interface {
// String returns string representation of statsFunc
String() string
// neededFields returns the needed fields for calculating the given stats
neededFields() []string
// updateNeededFields update neededFields with the fields needed for calculating the given stats
updateNeededFields(neededFields fieldsSet)
// newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc.
// newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc
//
// It also must return the size in bytes of the returned statsProcessor.
// It also must return the size in bytes of the returned statsProcessor
newStatsProcessor() (statsProcessor, int)
}
@ -77,7 +88,12 @@ func (ps *pipeStats) String() string {
}
a := make([]string, len(ps.funcs))
for i, f := range ps.funcs {
a[i] = f.String() + " as " + quoteTokenIfNeeded(ps.resultNames[i])
line := f.f.String()
if f.iff != nil {
line += " if (" + f.iff.String() + ")"
}
line += " as " + quoteTokenIfNeeded(f.resultName)
a[i] = line
}
s += strings.Join(a, ", ")
return s
@ -87,23 +103,18 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
neededFieldsOrig := neededFields.clone()
neededFields.reset()
byFields := make([]string, len(ps.byFields))
for i, bf := range ps.byFields {
byFields[i] = bf.name
// byFields are needed unconditionally, since the output number of rows depends on them.
for _, bf := range ps.byFields {
neededFields.add(bf.name)
}
for _, f := range byFields {
if neededFieldsOrig.contains(f) && !unneededFields.contains(f) {
neededFields.addAll(byFields)
for _, f := range ps.funcs {
if neededFieldsOrig.contains(f.resultName) && !unneededFields.contains(f.resultName) {
f.f.updateNeededFields(neededFields)
if f.iff != nil {
f.iff.updateNeededFields(neededFields)
}
}
for i, resultName := range ps.resultNames {
if neededFieldsOrig.contains(resultName) && !unneededFields.contains(resultName) {
funcFields := ps.funcs[i].neededFields()
neededFields.addAll(byFields)
neededFields.addAll(funcFields)
}
}
unneededFields.reset()
@ -115,11 +126,21 @@ func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{},
maxStateSize := int64(float64(memory.Allowed()) * 0.3)
shards := make([]pipeStatsProcessorShard, workersCount)
funcsLen := len(ps.funcs)
for i := range shards {
shard := &shards[i]
shard.ps = ps
shard.m = make(map[string]*pipeStatsGroup)
shard.stateSizeBudget = stateSizeBudgetChunk
shards[i] = pipeStatsProcessorShard{
pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{
ps: ps,
m: make(map[string]*pipeStatsGroup),
bms: make([]bitmap, funcsLen),
brs: make([]*blockResult, funcsLen),
brsBuf: make([]blockResult, funcsLen),
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk
}
@ -159,8 +180,14 @@ type pipeStatsProcessorShard struct {
type pipeStatsProcessorShardNopad struct {
ps *pipeStats
m map[string]*pipeStatsGroup
// bms, brs and brsBuf are used for applying per-func filters.
bms []bitmap
brs []*blockResult
brsBuf []blockResult
columnValues [][]string
keyBuf []byte
@ -170,10 +197,14 @@ type pipeStatsProcessorShardNopad struct {
func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
byFields := shard.ps.byFields
// Apply per-function filters
brs := shard.applyPerFunctionFilters(br)
// Process stats for the defined functions
if len(byFields) == 0 {
// Fast path - pass all the rows to a single group with empty key.
psg := shard.getPipeStatsGroup(nil)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return
}
if len(byFields) == 1 {
@ -182,19 +213,19 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
c := br.getColumnByName(bf.name)
if c.isConst {
// Fast path for column with constant value.
v := br.getBucketedValue(c.encodedValues[0], bf)
v := br.getBucketedValue(c.valuesEncoded[0], bf)
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(v))
psg := shard.getPipeStatsGroup(shard.keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return
}
values := c.getBucketedValues(br, bf)
values := c.getValuesBucketed(br, bf)
if areConstValues(values) {
// Fast path for column with constant values.
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(values[0]))
psg := shard.getPipeStatsGroup(shard.keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return
}
@ -206,7 +237,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
psg = shard.getPipeStatsGroup(keyBuf)
}
shard.stateSizeBudget -= psg.updateStatsForRow(br, i)
shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
}
shard.keyBuf = keyBuf
return
@ -216,7 +247,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
columnValues := shard.columnValues[:0]
for _, bf := range byFields {
c := br.getColumnByName(bf.name)
values := c.getBucketedValues(br, bf)
values := c.getValuesBucketed(br, bf)
columnValues = append(columnValues, values)
}
shard.columnValues = columnValues
@ -236,7 +267,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[0]))
}
psg := shard.getPipeStatsGroup(keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
shard.keyBuf = keyBuf
return
}
@ -261,11 +292,44 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
}
psg = shard.getPipeStatsGroup(keyBuf)
}
shard.stateSizeBudget -= psg.updateStatsForRow(br, i)
shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
}
shard.keyBuf = keyBuf
}
func (shard *pipeStatsProcessorShard) applyPerFunctionFilters(brSrc *blockResult) []*blockResult {
funcs := shard.ps.funcs
brs := shard.brs
for i := range funcs {
iff := funcs[i].iff
if iff == nil {
// Fast path - there are no per-function filters
brs[i] = brSrc
continue
}
bm := &shard.bms[i]
bm.init(len(brSrc.timestamps))
bm.setBits()
iff.applyToBlockResult(brSrc, bm)
if bm.areAllBitsSet() {
// Fast path - per-function filter doesn't filter out rows
brs[i] = brSrc
continue
}
// Store the remaining rows for the needed per-func fields to brDst
brDst := &shard.brsBuf[i]
if bm.isZero() {
brDst.reset()
} else {
brDst.initFromFilterNeededColumns(brSrc, bm, funcs[i].neededFieldsForFunc)
}
brs[i] = brDst
}
return brs
}
func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGroup {
psg := shard.m[string(key)]
if psg != nil {
@ -274,7 +338,7 @@ func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGr
sfps := make([]statsProcessor, len(shard.ps.funcs))
for i, f := range shard.ps.funcs {
sfp, stateSize := f.newStatsProcessor()
sfp, stateSize := f.f.newStatsProcessor()
sfps[i] = sfp
shard.stateSizeBudget -= stateSize
}
@ -291,18 +355,18 @@ type pipeStatsGroup struct {
sfps []statsProcessor
}
func (psg *pipeStatsGroup) updateStatsForAllRows(br *blockResult) int {
func (psg *pipeStatsGroup) updateStatsForAllRows(brs []*blockResult) int {
n := 0
for _, sfp := range psg.sfps {
n += sfp.updateStatsForAllRows(br)
for i, sfp := range psg.sfps {
n += sfp.updateStatsForAllRows(brs[i])
}
return n
}
func (psg *pipeStatsGroup) updateStatsForRow(br *blockResult, rowIdx int) int {
func (psg *pipeStatsGroup) updateStatsForRow(brs []*blockResult, rowIdx int) int {
n := 0
for _, sfp := range psg.sfps {
n += sfp.updateStatsForRow(br, rowIdx)
for i, sfp := range psg.sfps {
n += sfp.updateStatsForRow(brs[i], rowIdx)
}
return n
}
@ -368,16 +432,12 @@ func (psp *pipeStatsProcessor) flush() error {
m = shards[0].m
}
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.resultNames))
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs))
for _, bf := range byFields {
rcs = append(rcs, resultColumn{
name: bf.name,
})
rcs = appendResultColumnWithName(rcs, bf.name)
}
for _, resultName := range psp.ps.resultNames {
rcs = append(rcs, resultColumn{
name: resultName,
})
for _, f := range psp.ps.funcs {
rcs = appendResultColumnWithName(rcs, f.resultName)
}
var br blockResult
@ -423,7 +483,7 @@ func (psp *pipeStatsProcessor) flush() error {
psp.ppBase.writeBlock(0, &br)
br.reset()
for i := range rcs {
rcs[i].resetKeepName()
rcs[i].resetValues()
}
valuesLen = 0
}
@ -443,8 +503,10 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
lex.nextToken()
var ps pipeStats
if lex.isKeyword("by", "(") {
if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseByStatsFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
@ -452,17 +514,36 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
ps.byFields = bfs
}
var resultNames []string
var funcs []statsFunc
var funcs []pipeStatsFunc
for {
sf, resultName, err := parseStatsFunc(lex)
var f pipeStatsFunc
sf, err := parseStatsFunc(lex)
if err != nil {
return nil, err
}
resultNames = append(resultNames, resultName)
funcs = append(funcs, sf)
f.f = sf
if lex.isKeyword("if") {
iff, err := parseIfFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'if' filter for %s: %w", sf, err)
}
f.iff = iff
neededFields := newFieldsSet()
iff.updateNeededFields(neededFields)
f.neededFieldsForFunc = neededFields.getAll()
}
resultName, err := parseResultName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for %s: %w", sf, err)
}
f.resultName = resultName
funcs = append(funcs, f)
if lex.isKeyword("|", ")", "") {
ps.resultNames = resultNames
ps.funcs = funcs
return &ps, nil
}
@ -473,90 +554,107 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
}
}
func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
var sf statsFunc
func parseIfFilter(lex *lexer) (filter, error) {
if !lex.isKeyword("if") {
return nil, fmt.Errorf("unexpected keyword %q; expecting 'if'", lex.token)
}
lex.nextToken()
if !lex.isKeyword("(") {
return nil, fmt.Errorf("unexpected token %q after 'if'; expecting '('", lex.token)
}
lex.nextToken()
if lex.isKeyword(")") {
lex.nextToken()
return &filterNoop{}, nil
}
f, err := parseFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'if' filter: %w", err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("unexpected token %q after 'if' filter; expecting ')'", lex.token)
}
lex.nextToken()
return f, nil
}
func parseStatsFunc(lex *lexer) (statsFunc, error) {
switch {
case lex.isKeyword("count"):
scs, err := parseStatsCount(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count' func: %w", err)
return nil, fmt.Errorf("cannot parse 'count' func: %w", err)
}
sf = scs
return scs, nil
case lex.isKeyword("count_empty"):
scs, err := parseStatsCountEmpty(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count_empty' func: %w", err)
return nil, fmt.Errorf("cannot parse 'count_empty' func: %w", err)
}
sf = scs
return scs, nil
case lex.isKeyword("count_uniq"):
sus, err := parseStatsCountUniq(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count_uniq' func: %w", err)
return nil, fmt.Errorf("cannot parse 'count_uniq' func: %w", err)
}
sf = sus
return sus, nil
case lex.isKeyword("sum"):
sss, err := parseStatsSum(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'sum' func: %w", err)
return nil, fmt.Errorf("cannot parse 'sum' func: %w", err)
}
sf = sss
return sss, nil
case lex.isKeyword("max"):
sms, err := parseStatsMax(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'max' func: %w", err)
return nil, fmt.Errorf("cannot parse 'max' func: %w", err)
}
sf = sms
return sms, nil
case lex.isKeyword("min"):
sms, err := parseStatsMin(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'min' func: %w", err)
return nil, fmt.Errorf("cannot parse 'min' func: %w", err)
}
sf = sms
return sms, nil
case lex.isKeyword("avg"):
sas, err := parseStatsAvg(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'avg' func: %w", err)
return nil, fmt.Errorf("cannot parse 'avg' func: %w", err)
}
sf = sas
return sas, nil
case lex.isKeyword("uniq_values"):
sus, err := parseStatsUniqValues(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'uniq_values' func: %w", err)
return nil, fmt.Errorf("cannot parse 'uniq_values' func: %w", err)
}
sf = sus
return sus, nil
case lex.isKeyword("values"):
svs, err := parseStatsValues(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'values' func: %w", err)
return nil, fmt.Errorf("cannot parse 'values' func: %w", err)
}
sf = svs
return svs, nil
case lex.isKeyword("sum_len"):
sss, err := parseStatsSumLen(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'sum_len' func: %w", err)
return nil, fmt.Errorf("cannot parse 'sum_len' func: %w", err)
}
sf = sss
return sss, nil
case lex.isKeyword("quantile"):
sqs, err := parseStatsQuantile(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
return nil, fmt.Errorf("cannot parse 'quantile' func: %w", err)
}
sf = sqs
return sqs, nil
case lex.isKeyword("median"):
sms, err := parseStatsMedian(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err)
return nil, fmt.Errorf("cannot parse 'median' func: %w", err)
}
sf = sms
return sms, nil
default:
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
return nil, fmt.Errorf("unknown stats func %q", lex.token)
}
resultName, err := parseResultName(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse result name for %s: %w", sf, err)
}
return sf, resultName, nil
}
func parseResultName(lex *lexer) (string, error) {
@ -619,10 +717,11 @@ func parseByStatsFields(lex *lexer) ([]*byStatsField, error) {
lex.nextToken()
return bfs, nil
}
fieldName, err := parseFieldName(lex)
fieldName, err := getCompoundPhrase(lex, false)
if err != nil {
return nil, fmt.Errorf("cannot parse field name: %w", err)
}
fieldName = getCanonicalColumnName(fieldName)
bf := &byStatsField{
name: fieldName,
}
@ -796,10 +895,10 @@ func parseFieldNamesInParens(lex *lexer) ([]string, error) {
}
func parseFieldName(lex *lexer) (string, error) {
if lex.isKeyword(",", "(", ")", "[", "]", "|", ":", "") {
return "", fmt.Errorf("unexpected token: %q", lex.token)
fieldName, err := getCompoundToken(lex)
if err != nil {
return "", fmt.Errorf("cannot parse field name: %w", err)
}
fieldName := getCompoundPhrase(lex, false)
fieldName = getCanonicalColumnName(fieldName)
return fieldName, nil
}

View file

@ -7,18 +7,7 @@ import (
func TestPipeStatsUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeStats(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
@ -44,7 +33,7 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
f("stats count(f1,f2) r1, sum(f3,f4) r2", "*", "r1,r3", "f3,f4", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "*", "r1,r3", "b1,b2,f1,f3", "")
// needed fields do not intersect with stats fields
@ -52,8 +41,8 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
f("stats count(*) r1", "r2", "", "", "")
f("stats count(f1,f2) r1", "r2", "", "", "")
f("stats count(f1,f2) r1, sum(f3,f4) r2", "r3", "", "", "")
f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "", "")
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "", "")
f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "b1,b2", "")
// needed fields intersect with stats fields
f("stats count() r1", "r1,r2", "", "", "")

View file

@ -18,9 +18,12 @@ func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
shards := make([]pipeTopkProcessorShard, workersCount)
for i := range shards {
shard := &shards[i]
shard.ps = ps
shard.stateSizeBudget = stateSizeBudgetChunk
shards[i] = pipeTopkProcessorShard{
pipeTopkProcessorShardNopad: pipeTopkProcessorShardNopad{
ps: ps,
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk
}
@ -73,8 +76,9 @@ type pipeTopkProcessorShardNopad struct {
// these are aux fields for determining whether the next row must be stored in rows.
byColumnValues [][]string
otherColumnValues []pipeTopkOtherColumn
csOther []*blockResultColumn
byColumns []string
byColumnsIsTime []bool
otherColumns []Field
// stateSizeBudget is the remaining budget for the whole state size for the shard.
@ -84,12 +88,9 @@ type pipeTopkProcessorShardNopad struct {
type pipeTopkRow struct {
byColumns []string
byColumnsIsTime []bool
otherColumns []Field
}
type pipeTopkOtherColumn struct {
name string
values []string
timestamp int64
}
func (r *pipeTopkRow) clone() *pipeTopkRow {
@ -98,6 +99,8 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
byColumnsCopy[i] = strings.Clone(r.byColumns[i])
}
byColumnsIsTime := append([]bool{}, r.byColumnsIsTime...)
otherColumnsCopy := make([]Field, len(r.otherColumns))
for i := range otherColumnsCopy {
src := &r.otherColumns[i]
@ -108,7 +111,9 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
return &pipeTopkRow{
byColumns: byColumnsCopy,
byColumnsIsTime: byColumnsIsTime,
otherColumns: otherColumnsCopy,
timestamp: r.timestamp,
}
}
@ -120,6 +125,8 @@ func (r *pipeTopkRow) sizeBytes() int {
}
n += len(r.byColumns) * int(unsafe.Sizeof(r.byColumns[0]))
n += len(r.byColumnsIsTime) * int(unsafe.Sizeof(r.byColumnsIsTime[0]))
for _, f := range r.otherColumns {
n += len(f.Name) + len(f.Value)
}
@ -167,14 +174,15 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
byColumnValues := shard.byColumnValues[:0]
for _, c := range cs {
byColumnValues = append(byColumnValues, c.getValues(br))
values := c.getValues(br)
byColumnValues = append(byColumnValues, values)
}
shard.byColumnValues = byColumnValues
byColumns := shard.byColumns[:0]
otherColumns := shard.otherColumns[:0]
byColumnsIsTime := shard.byColumnsIsTime[:0]
bb := bbPool.Get()
for rowIdx := range br.timestamps {
for rowIdx, timestamp := range br.timestamps {
byColumns = byColumns[:0]
bb.B = bb.B[:0]
for i, values := range byColumnValues {
@ -183,31 +191,33 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
bb.B = append(bb.B, ',')
}
byColumns = append(byColumns, bytesutil.ToUnsafeString(bb.B))
byColumnsIsTime = append(byColumnsIsTime, false)
otherColumns = otherColumns[:0]
for i, values := range byColumnValues {
otherColumns = append(otherColumns, Field{
Name: cs[i].name,
Value: values[rowIdx],
})
}
shard.addRow(byColumns, otherColumns)
shard.addRow(br, byColumns, byColumnsIsTime, cs, rowIdx, timestamp)
}
bbPool.Put(bb)
shard.byColumns = byColumns
shard.otherColumns = otherColumns
shard.byColumnsIsTime = byColumnsIsTime
} else {
// Sort by byFields
byColumnValues := shard.byColumnValues[:0]
byColumnsIsTime := shard.byColumnsIsTime[:0]
for _, bf := range byFields {
c := br.getColumnByName(bf.name)
byColumnValues = append(byColumnValues, c.getValues(br))
byColumnsIsTime = append(byColumnsIsTime, c.isTime)
var values []string
if !c.isTime {
values = c.getValues(br)
}
byColumnValues = append(byColumnValues, values)
}
shard.byColumnValues = byColumnValues
shard.byColumnsIsTime = byColumnsIsTime
otherColumnValues := shard.otherColumnValues[:0]
csOther := shard.csOther[:0]
for _, c := range cs {
isByField := false
for _, bf := range byFields {
@ -217,42 +227,35 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
}
}
if !isByField {
otherColumnValues = append(otherColumnValues, pipeTopkOtherColumn{
name: c.name,
values: c.getValues(br),
})
csOther = append(csOther, c)
}
}
shard.otherColumnValues = otherColumnValues
shard.csOther = csOther
// add rows to shard
byColumns := shard.byColumns[:0]
otherColumns := shard.otherColumns[:0]
for rowIdx := range br.timestamps {
for rowIdx, timestamp := range br.timestamps {
byColumns = byColumns[:0]
for _, values := range byColumnValues {
byColumns = append(byColumns, values[rowIdx])
for i, values := range byColumnValues {
v := ""
if !byColumnsIsTime[i] {
v = values[rowIdx]
}
byColumns = append(byColumns, v)
}
otherColumns = otherColumns[:0]
for _, ocv := range otherColumnValues {
otherColumns = append(otherColumns, Field{
Name: ocv.name,
Value: ocv.values[rowIdx],
})
}
shard.addRow(byColumns, otherColumns)
shard.addRow(br, byColumns, byColumnsIsTime, csOther, rowIdx, timestamp)
}
shard.byColumns = byColumns
shard.otherColumns = otherColumns
}
}
func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []Field) {
func (shard *pipeTopkProcessorShard) addRow(br *blockResult, byColumns []string, byColumnsIsTime []bool, csOther []*blockResultColumn, rowIdx int, timestamp int64) {
r := &shard.tmpRow
r.byColumns = byColumns
r.otherColumns = otherColumns
r.byColumnsIsTime = byColumnsIsTime
r.timestamp = timestamp
rows := shard.rows
if len(rows) > 0 && !topkLess(shard.ps, r, rows[0]) {
@ -261,9 +264,25 @@ func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []F
}
// Slow path - add r to shard.rows.
// Populate r.otherColumns
otherColumns := shard.otherColumns[:0]
for _, c := range csOther {
v := c.getValueAtRow(br, rowIdx)
otherColumns = append(otherColumns, Field{
Name: c.name,
Value: v,
})
}
shard.otherColumns = otherColumns
r.otherColumns = otherColumns
// Clone r, so it doesn't refer the original data.
r = r.clone()
shard.stateSizeBudget -= r.sizeBytes()
if uint64(len(rows)) < shard.ps.limit {
// Push r to shard.rows.
if uint64(len(rows)) < shard.ps.offset+shard.ps.limit {
heap.Push(shard, r)
shard.stateSizeBudget -= int(unsafe.Sizeof(r))
} else {
@ -443,21 +462,23 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
rcs = wctx.rcs[:0]
for _, bf := range byFields {
rcs = append(rcs, resultColumn{
name: bf.name,
})
rcs = appendResultColumnWithName(rcs, bf.name)
}
for _, c := range r.otherColumns {
rcs = append(rcs, resultColumn{
name: c.Name,
})
rcs = appendResultColumnWithName(rcs, c.Name)
}
wctx.rcs = rcs
}
var tmpBuf []byte
byColumns := r.byColumns
byColumnsIsTime := r.byColumnsIsTime
for i := range byFields {
v := byColumns[i]
if byColumnsIsTime[i] {
tmpBuf = marshalTimestampRFC3339NanoString(tmpBuf[:0], r.timestamp)
v = bytesutil.ToUnsafeString(tmpBuf)
}
rcs[i].addValue(v)
wctx.valuesLen += len(v)
}
@ -490,7 +511,7 @@ func (wctx *pipeTopkWriteContext) flush() {
wctx.ptp.ppBase.writeBlock(0, br)
br.reset()
for i := range rcs {
rcs[i].resetKeepName()
rcs[i].resetValues()
}
}
@ -529,25 +550,79 @@ func topkLess(ps *pipeSort, a, b *pipeTopkRow) bool {
byFields := ps.byFields
csA := a.byColumns
csB := b.byColumns
isTimeA := a.byColumnsIsTime
for k := range csA {
csB := b.byColumns
isTimeB := b.byColumnsIsTime
for i := range csA {
isDesc := ps.isDesc
if len(byFields) > 0 && byFields[k].isDesc {
if len(byFields) > 0 && byFields[i].isDesc {
isDesc = !isDesc
}
vA := csA[k]
vB := csB[k]
if isTimeA[i] && isTimeB[i] {
// Fast path - compare timestamps
if a.timestamp == b.timestamp {
continue
}
if isDesc {
return b.timestamp < a.timestamp
}
return a.timestamp < b.timestamp
}
vA := csA[i]
vB := csB[i]
var bb *bytesutil.ByteBuffer
if isTimeA[i] || isTimeB[i] {
bb = bbPool.Get()
}
if isTimeA[i] {
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
vA = bytesutil.ToUnsafeString(bb.B)
} else if isTimeB[i] {
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
vB = bytesutil.ToUnsafeString(bb.B)
}
if vA == vB {
if bb != nil {
bbPool.Put(bb)
}
continue
}
if isDesc {
return stringsutil.LessNatural(vB, vA)
vA, vB = vB, vA
}
return stringsutil.LessNatural(vA, vB)
ok := lessString(vA, vB)
if bb != nil {
bbPool.Put(bb)
}
return ok
}
return false
}
func lessString(a, b string) bool {
if a == b {
return false
}
nA, okA := tryParseUint64(a)
nB, okB := tryParseUint64(b)
if okA && okB {
return nA < nB
}
fA, okA := tryParseFloat64(a)
fB, okB := tryParseFloat64(b)
if okA && okB {
return fA < fB
}
return stringsutil.LessNatural(a, b)
}

View file

@ -3,6 +3,7 @@ package logstorage
import (
"fmt"
"slices"
"strings"
"sync/atomic"
"unsafe"
@ -40,7 +41,7 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
if len(pu.byFields) == 0 {
neededFields.add("*")
} else {
neededFields.addAll(pu.byFields)
neededFields.addFields(pu.byFields)
}
}
@ -49,10 +50,13 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
shards := make([]pipeUniqProcessorShard, workersCount)
for i := range shards {
shard := &shards[i]
shard.pu = pu
shard.m = make(map[string]struct{})
shard.stateSizeBudget = stateSizeBudgetChunk
shards[i] = pipeUniqProcessorShard{
pipeUniqProcessorShardNopad: pipeUniqProcessorShardNopad{
pu: pu,
m: make(map[string]struct{}),
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk
}
@ -116,7 +120,6 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
return false
}
m := shard.m
byFields := shard.pu.byFields
if len(byFields) == 0 {
// Take into account all the columns in br.
@ -129,20 +132,41 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
}
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
}
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
}
shard.keyBuf = keyBuf
return true
}
if len(byFields) == 1 {
// Fast path for a single field.
c := br.getColumnByName(byFields[0])
if c.isConst {
v := c.valuesEncoded[0]
shard.updateState(v)
return true
}
if c.valueType == valueTypeDict {
for _, v := range c.dictValues {
shard.updateState(v)
}
return true
}
values := c.getValues(br)
for i, v := range values {
if i == 0 || values[i-1] != values[i] {
shard.updateState(v)
}
}
return true
}
// Take into account only the selected columns.
columnValues := shard.columnValues[:0]
for _, f := range byFields {
c := br.getColumnByName(f)
columnValues = append(columnValues, c.getValues(br))
values := c.getValues(br)
columnValues = append(columnValues, values)
}
shard.columnValues = columnValues
@ -163,16 +187,21 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
for _, values := range columnValues {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[i]))
}
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
}
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
}
shard.keyBuf = keyBuf
return true
}
func (shard *pipeUniqProcessorShard) updateState(v string) {
if _, ok := shard.m[v]; !ok {
vCopy := strings.Clone(v)
shard.m[vCopy] = struct{}{}
shard.stateSizeBudget -= len(vCopy) + int(unsafe.Sizeof(vCopy))
}
}
func (pup *pipeUniqProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
@ -253,6 +282,19 @@ func (pup *pipeUniqProcessor) flush() error {
}
wctx.writeRow(rowFields)
}
} else if len(byFields) == 1 {
fieldName := byFields[0]
for k := range m {
if needStop(pup.stopCh) {
return nil
}
rowFields = append(rowFields[:0], Field{
Name: fieldName,
Value: k,
})
wctx.writeRow(rowFields)
}
} else {
for k := range m {
if needStop(pup.stopCh) {
@ -317,9 +359,7 @@ func (wctx *pipeUniqWriteContext) writeRow(rowFields []Field) {
rcs = wctx.rcs[:0]
for _, f := range rowFields {
rcs = append(rcs, resultColumn{
name: f.Name,
})
rcs = appendResultColumnWithName(rcs, f.Name)
}
wctx.rcs = rcs
}
@ -349,7 +389,7 @@ func (wctx *pipeUniqWriteContext) flush() {
wctx.pup.ppBase.writeBlock(0, br)
br.reset()
for i := range rcs {
rcs[i].resetKeepName()
rcs[i].resetValues()
}
}
@ -360,8 +400,10 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
lex.nextToken()
var pu pipeUniq
if lex.isKeyword("by", "(") {
if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseFieldNamesInParens(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)

View file

@ -7,18 +7,7 @@ import (
func TestPipeUniqUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeUniq(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields

View file

@ -0,0 +1,147 @@
package logstorage
import (
"fmt"
"unsafe"
)
// pipeUnpackJSON processes '| unpack_json ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe
type pipeUnpackJSON struct {
fromField string
resultPrefix string
}
func (pu *pipeUnpackJSON) String() string {
s := "unpack_json"
if !isMsgFieldName(pu.fromField) {
s += " from " + quoteTokenIfNeeded(pu.fromField)
}
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
return s
}
func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFields.remove(pu.fromField)
} else {
neededFields.add(pu.fromField)
}
}
func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeUnpackJSONProcessorShard, workersCount)
pup := &pipeUnpackJSONProcessor{
pu: pu,
ppBase: ppBase,
shards: shards,
}
return pup
}
type pipeUnpackJSONProcessor struct {
pu *pipeUnpackJSON
ppBase pipeProcessor
shards []pipeUnpackJSONProcessorShard
}
type pipeUnpackJSONProcessorShard struct {
pipeUnpackJSONProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUnpackJSONProcessorShardNopad{})%128]byte
}
type pipeUnpackJSONProcessorShardNopad struct {
p JSONParser
wctx pipeUnpackWriteContext
}
func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {
if len(v) == 0 || v[0] != '{' {
// This isn't a JSON object
return nil
}
if err := shard.p.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {
// Cannot parse v
return nil
}
return shard.p.Fields
}
func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
resultPrefix := pup.pu.resultPrefix
shard := &pup.shards[workerID]
wctx := &shard.wctx
wctx.init(br, pup.ppBase)
c := br.getColumnByName(pup.pu.fromField)
if c.isConst {
v := c.valuesEncoded[0]
extraFields := shard.parseJSON(v, resultPrefix)
for rowIdx := range br.timestamps {
wctx.writeRow(rowIdx, extraFields)
}
} else {
values := c.getValues(br)
var extraFields []Field
for i, v := range values {
if i == 0 || values[i-1] != v {
extraFields = shard.parseJSON(v, resultPrefix)
}
wctx.writeRow(i, extraFields)
}
}
wctx.flush()
shard.p.reset()
}
func (pup *pipeUnpackJSONProcessor) flush() error {
return nil
}
func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
if !lex.isKeyword("unpack_json") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_json")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
resultPrefix := ""
if lex.isKeyword("result_prefix") {
lex.nextToken()
p, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
}
resultPrefix = p
}
pu := &pipeUnpackJSON{
fromField: fromField,
resultPrefix: resultPrefix,
}
return pu, nil
}

View file

@ -0,0 +1,376 @@
package logstorage
import (
"math/rand"
"slices"
"strings"
"sync"
"testing"
)
func TestPipeUnpackJSON(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// single row, unpack from _msg
f("unpack_json", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar"}`},
{"foo", "bar"},
},
})
// single row, unpack from _msg into _msg
f("unpack_json", [][]Field{
{
{"_msg", `{"_msg":"bar"}`},
},
}, [][]Field{
{
{"_msg", "bar"},
},
})
// single row, unpack from missing field
f("unpack_json from x", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
})
// single row, unpack from non-json field
f("unpack_json from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// single row, unpack from non-dict json
f("unpack_json from x", [][]Field{
{
{"x", `["foobar"]`},
},
}, [][]Field{
{
{"x", `["foobar"]`},
},
})
f("unpack_json from x", [][]Field{
{
{"x", `1234`},
},
}, [][]Field{
{
{"x", `1234`},
},
})
f("unpack_json from x", [][]Field{
{
{"x", `"xxx"`},
},
}, [][]Field{
{
{"x", `"xxx"`},
},
})
// single row, unpack from named field
f("unpack_json from x", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
},
}, [][]Field{
{
{"x", `NaN`},
{"foo", "bar"},
{"baz", "xyz"},
{"a", "123"},
{"b", `["foo","bar"]`},
{"y.z.a", "b"},
},
})
// multiple rows with distinct number of fields
f("unpack_json from x", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
},
}, [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", "abc"},
{"foo", "bar"},
{"baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `["bar",123]`},
{"x", `{"z":["bar",123]}`},
},
})
// multiple rows with distinct number of fields with result_prefix
f("unpack_json from x result_prefix qwe_", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
},
}, [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", "abc"},
{"qwe_foo", "bar"},
{"qwe_baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
{"qwe_z", `["bar",123]`},
},
})
}
func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
lex := newLexer(pipeStr)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
}
workersCount := 5
stopCh := make(chan struct{})
cancel := func() {}
ppTest := newTestPipeProcessor()
pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
brw := newTestBlockResultWriter(workersCount, pp)
for _, row := range rows {
brw.writeRow(row)
}
brw.flush()
ppTest.expectRows(t, rowsExpected)
}
func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
return &testBlockResultWriter{
workersCount: workersCount,
ppBase: ppBase,
}
}
type testBlockResultWriter struct {
workersCount int
ppBase pipeProcessor
rcs []resultColumn
br blockResult
}
func (brw *testBlockResultWriter) writeRow(row []Field) {
if !brw.areSameFields(row) {
brw.flush()
brw.rcs = brw.rcs[:0]
for _, field := range row {
brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
}
}
for i, field := range row {
brw.rcs[i].addValue(field.Value)
}
if rand.Intn(5) == 0 {
brw.flush()
}
}
func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
if len(brw.rcs) != len(row) {
return false
}
for i, rc := range brw.rcs {
if rc.name != row[i].Name {
return false
}
}
return true
}
func (brw *testBlockResultWriter) flush() {
brw.br.setResultColumns(brw.rcs)
workerID := rand.Intn(brw.workersCount)
brw.ppBase.writeBlock(uint(workerID), &brw.br)
brw.br.reset()
for i := range brw.rcs {
brw.rcs[i].resetValues()
}
}
func newTestPipeProcessor() *testPipeProcessor {
return &testPipeProcessor{}
}
type testPipeProcessor struct {
resultRowsLock sync.Mutex
resultRows [][]Field
}
func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
cs := br.getColumns()
var columnValues [][]string
for _, c := range cs {
values := c.getValues(br)
columnValues = append(columnValues, values)
}
for i := range br.timestamps {
row := make([]Field, len(columnValues))
for j, values := range columnValues {
r := &row[j]
r.Name = strings.Clone(cs[j].name)
r.Value = strings.Clone(values[i])
}
pp.resultRowsLock.Lock()
pp.resultRows = append(pp.resultRows, row)
pp.resultRowsLock.Unlock()
}
}
func (pp *testPipeProcessor) flush() error {
return nil
}
func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
t.Helper()
if len(pp.resultRows) != len(expectedRows) {
t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
}
sortTestRows(pp.resultRows)
sortTestRows(expectedRows)
for i, resultRow := range pp.resultRows {
expectedRow := expectedRows[i]
if len(resultRow) != len(expectedRow) {
t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
}
for j, resultField := range resultRow {
expectedField := expectedRow[j]
if resultField.Name != expectedField.Name {
t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
}
if resultField.Value != expectedField.Value {
t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
}
}
}
}
func sortTestRows(rows [][]Field) {
slices.SortFunc(rows, func(a, b []Field) int {
reverse := -1
if len(a) > len(b) {
reverse = 1
a, b = b, a
}
for i, fA := range a {
fB := b[i]
if fA.Name == fB.Name {
if fA.Value == fB.Value {
continue
}
if fA.Value < fB.Value {
return reverse
}
return -reverse
}
if fA.Name < fB.Name {
return reverse
}
return -reverse
}
if len(a) == len(b) {
return 0
}
return reverse
})
}
func rowsToString(rows [][]Field) string {
a := make([]string, len(rows))
for i, row := range rows {
a[i] = rowToString(row)
}
return strings.Join(a, "\n")
}
func rowToString(row []Field) string {
a := make([]string, len(row))
for i, f := range row {
a[i] = f.String()
}
return "{" + strings.Join(a, ",") + "}"
}
func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("unpack_json from x", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unpack_json from x", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("unpack_json from x", "*", "f2,x", "*", "f2")
// needed fields do not intersect with src
f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")
// needed fields intersect with src
f("unpack_json from x", "f2,x", "", "f2,x", "")
}

View file

@ -0,0 +1,289 @@
package logstorage
import (
"fmt"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// pipeUnpackLogfmt processes '| unpack_logfmt ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe
type pipeUnpackLogfmt struct {
fromField string
resultPrefix string
}
func (pu *pipeUnpackLogfmt) String() string {
s := "unpack_logfmt"
if !isMsgFieldName(pu.fromField) {
s += " from " + quoteTokenIfNeeded(pu.fromField)
}
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
return s
}
func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFields.remove(pu.fromField)
} else {
neededFields.add(pu.fromField)
}
}
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeUnpackLogfmtProcessorShard, workersCount)
pup := &pipeUnpackLogfmtProcessor{
pu: pu,
ppBase: ppBase,
shards: shards,
}
return pup
}
type pipeUnpackLogfmtProcessor struct {
pu *pipeUnpackLogfmt
ppBase pipeProcessor
shards []pipeUnpackLogfmtProcessorShard
}
type pipeUnpackLogfmtProcessorShard struct {
pipeUnpackLogfmtProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUnpackLogfmtProcessorShardNopad{})%128]byte
}
type pipeUnpackLogfmtProcessorShardNopad struct {
p logfmtParser
wctx pipeUnpackWriteContext
}
func (pup *pipeUnpackLogfmtProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
resultPrefix := pup.pu.resultPrefix
shard := &pup.shards[workerID]
wctx := &shard.wctx
wctx.init(br, pup.ppBase)
c := br.getColumnByName(pup.pu.fromField)
if c.isConst {
v := c.valuesEncoded[0]
extraFields := shard.p.parse(v, resultPrefix)
for rowIdx := range br.timestamps {
wctx.writeRow(rowIdx, extraFields)
}
} else {
values := c.getValues(br)
var extraFields []Field
for i, v := range values {
if i == 0 || values[i-1] != v {
extraFields = shard.p.parse(v, resultPrefix)
}
wctx.writeRow(i, extraFields)
}
}
wctx.flush()
shard.p.reset()
}
func (pup *pipeUnpackLogfmtProcessor) flush() error {
return nil
}
func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {
if !lex.isKeyword("unpack_logfmt") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_logfmt")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
resultPrefix := ""
if lex.isKeyword("result_prefix") {
lex.nextToken()
p, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
}
resultPrefix = p
}
pu := &pipeUnpackLogfmt{
fromField: fromField,
resultPrefix: resultPrefix,
}
return pu, nil
}
type pipeUnpackWriteContext struct {
brSrc *blockResult
csSrc []*blockResultColumn
ppBase pipeProcessor
rcs []resultColumn
br blockResult
valuesLen int
}
func (wctx *pipeUnpackWriteContext) init(brSrc *blockResult, ppBase pipeProcessor) {
wctx.brSrc = brSrc
wctx.csSrc = brSrc.getColumns()
wctx.ppBase = ppBase
}
func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
csSrc := wctx.csSrc
rcs := wctx.rcs
areEqualColumns := len(rcs) == len(csSrc)+len(extraFields)
if areEqualColumns {
for i, f := range extraFields {
if rcs[len(csSrc)+i].name != f.Name {
areEqualColumns = false
break
}
}
}
if !areEqualColumns {
// send the current block to bbBase and construct a block with new set of columns
wctx.flush()
rcs = wctx.rcs[:0]
for _, c := range csSrc {
rcs = appendResultColumnWithName(rcs, c.name)
}
for _, f := range extraFields {
rcs = appendResultColumnWithName(rcs, f.Name)
}
wctx.rcs = rcs
}
brSrc := wctx.brSrc
for i, c := range csSrc {
v := c.getValueAtRow(brSrc, rowIdx)
rcs[i].addValue(v)
wctx.valuesLen += len(v)
}
for i, f := range extraFields {
v := f.Value
rcs[len(csSrc)+i].addValue(v)
wctx.valuesLen += len(v)
}
if wctx.valuesLen >= 1_000_000 {
wctx.flush()
}
}
func (wctx *pipeUnpackWriteContext) flush() {
rcs := wctx.rcs
wctx.valuesLen = 0
if len(rcs) == 0 {
return
}
// Flush rcs to ppBase
br := &wctx.br
br.setResultColumns(rcs)
wctx.ppBase.writeBlock(0, br)
br.reset()
for i := range rcs {
rcs[i].resetValues()
}
}
type logfmtParser struct {
Fields []Field
buf []byte
}
func (p *logfmtParser) reset() {
clear(p.Fields)
p.Fields = p.Fields[:0]
p.buf = p.buf[:0]
}
func (p *logfmtParser) parse(s, resultPrefix string) []Field {
clear(p.Fields)
p.Fields = p.Fields[:0]
for {
// Search for field name
n := strings.IndexByte(s, '=')
if n < 0 {
// field name couldn't be read
return p.Fields
}
name := strings.TrimSpace(s[:n])
s = s[n+1:]
if len(s) == 0 {
p.addField(name, "", resultPrefix)
return p.Fields
}
// Search for field value
value, nOffset := tryUnquoteString(s)
if nOffset >= 0 {
p.addField(name, value, resultPrefix)
s = s[nOffset:]
if len(s) == 0 {
return p.Fields
}
if s[0] != ' ' {
return p.Fields
}
s = s[1:]
} else {
n := strings.IndexByte(s, ' ')
if n < 0 {
p.addField(name, s, resultPrefix)
return p.Fields
}
p.addField(name, s[:n], resultPrefix)
s = s[n+1:]
}
}
}
func (p *logfmtParser) addField(name, value, resultPrefix string) {
if resultPrefix != "" {
buf := p.buf
bufLen := len(buf)
buf = append(buf, resultPrefix...)
buf = append(buf, name...)
p.buf = buf
name = bytesutil.ToUnsafeString(buf[bufLen:])
}
p.Fields = append(p.Fields, Field{
Name: name,
Value: value,
})
}

View file

@ -0,0 +1,175 @@
package logstorage
import (
"testing"
)
func TestPipeUnpackLogfmt(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// single row, unpack from _msg
f("unpack_logfmt", [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"foo", "bar"},
{"baz", "x y=z"},
{"a", "b"},
},
})
// single row, unpack from _msg into _msg
f("unpack_logfmt", [][]Field{
{
{"_msg", `_msg=bar`},
},
}, [][]Field{
{
{"_msg", "bar"},
},
})
// single row, unpack from missing field
f("unpack_logfmt from x", [][]Field{
{
{"_msg", `foo=bar`},
},
}, [][]Field{
{
{"_msg", `foo=bar`},
},
})
// single row, unpack from non-json field
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// single row, unpack from non-logfmt
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// unpack empty value
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar=`},
},
}, [][]Field{
{
{"x", `foobar=`},
{"foobar", ""},
},
})
f("unpack_logfmt from x", [][]Field{
{
{"x", `foo="" bar= baz=`},
},
}, [][]Field{
{
{"x", `foo="" bar= baz=`},
{"foo", ""},
{"bar", ""},
{"baz", ""},
},
})
// multiple rows with distinct number of fields
f("unpack_logfmt from x", [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
},
}, [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", "abc"},
{"foo", "bar"},
{"baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `bar`},
{"x", `z=bar`},
},
})
// multiple rows with distinct number of fields, with result_prefix
f("unpack_logfmt from x result_prefix qwe_", [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
},
}, [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", "abc"},
{"qwe_foo", "bar"},
{"qwe_baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
{"qwe_z", `bar`},
},
})
}
func TestPipeUnpackLogfmtUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("unpack_logfmt from x", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unpack_logfmt from x", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("unpack_logfmt from x", "*", "f2,x", "*", "f2")
// needed fields do not intersect with src
f("unpack_logfmt from x", "f1,f2", "", "f1,f2,x", "")
// needed fields intersect with src
f("unpack_logfmt from x", "f2,x", "", "f2,x", "")
}

View file

@ -1,7 +1,6 @@
package logstorage
import (
"math"
"slices"
"strconv"
"unsafe"
@ -16,8 +15,8 @@ func (sa *statsAvg) String() string {
return "avg(" + fieldNamesString(sa.fields) + ")"
}
func (sa *statsAvg) neededFields() []string {
return sa.fields
func (sa *statsAvg) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sa.fields)
}
func (sa *statsAvg) newStatsProcessor() (statsProcessor, int) {
@ -58,8 +57,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
if sap.sa.containsStar {
// Scan all the fields for the given row
for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
sap.sum += f
sap.count++
}
@ -68,8 +67,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
// Scan only the given fields for the given row
for _, field := range sap.sa.fields {
c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
sap.sum += f
sap.count++
}

View file

@ -17,12 +17,12 @@ func (sc *statsCount) String() string {
return "count(" + fieldNamesString(sc.fields) + ")"
}
func (sc *statsCount) neededFields() []string {
func (sc *statsCount) updateNeededFields(neededFields fieldsSet) {
if sc.containsStar {
// There is no need in fetching any columns for count(*) - the number of matching rows can be calculated as len(blockResult.timestamps)
return nil
return
}
return sc.fields
neededFields.addFields(sc.fields)
}
func (sc *statsCount) newStatsProcessor() (statsProcessor, int) {
@ -49,7 +49,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
if c.valuesEncoded[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
}
return 0
@ -60,7 +60,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
}
switch c.valueType {
case valueTypeString:
for _, v := range c.encodedValues {
for _, v := range c.getValuesEncoded(br) {
if v != "" {
scp.rowsCount++
}
@ -72,7 +72,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
for _, v := range c.encodedValues {
for _, v := range c.getValuesEncoded(br) {
if int(v[0]) != zeroDictIdx {
scp.rowsCount++
}
@ -95,7 +95,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
for _, f := range fields {
c := br.getColumnByName(f)
if c.isConst {
if c.encodedValues[0] != "" {
if c.valuesEncoded[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
@ -105,18 +105,21 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
switch c.valueType {
case valueTypeString:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool {
return c.encodedValues[i] == ""
return valuesEncoded[i] == ""
})
case valueTypeDict:
if !slices.Contains(c.dictValues, "") {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool {
dictIdx := c.encodedValues[i][0]
dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == ""
})
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
@ -144,7 +147,7 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
if c.valuesEncoded[0] != "" {
scp.rowsCount++
}
return 0
@ -155,12 +158,14 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
}
switch c.valueType {
case valueTypeString:
if v := c.encodedValues[rowIdx]; v != "" {
valuesEncoded := c.getValuesEncoded(br)
if v := valuesEncoded[rowIdx]; v != "" {
scp.rowsCount++
}
return 0
case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0]
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
if v := c.dictValues[dictIdx]; v != "" {
scp.rowsCount++
}

View file

@ -17,8 +17,8 @@ func (sc *statsCountEmpty) String() string {
return "count_empty(" + fieldNamesString(sc.fields) + ")"
}
func (sc *statsCountEmpty) neededFields() []string {
return sc.fields
func (sc *statsCountEmpty) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sc.fields)
}
func (sc *statsCountEmpty) newStatsProcessor() (statsProcessor, int) {
@ -53,7 +53,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
// Fast path for count_empty(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] == "" {
if c.valuesEncoded[0] == "" {
scp.rowsCount += uint64(len(br.timestamps))
}
return 0
@ -63,7 +63,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
}
switch c.valueType {
case valueTypeString:
for _, v := range c.encodedValues {
for _, v := range c.getValuesEncoded(br) {
if v == "" {
scp.rowsCount++
}
@ -74,7 +74,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
if zeroDictIdx < 0 {
return 0
}
for _, v := range c.encodedValues {
for _, v := range c.getValuesEncoded(br) {
if int(v[0]) == zeroDictIdx {
scp.rowsCount++
}
@ -96,7 +96,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
for _, f := range fields {
c := br.getColumnByName(f)
if c.isConst {
if c.encodedValues[0] == "" {
if c.valuesEncoded[0] == "" {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
@ -107,15 +107,17 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
}
switch c.valueType {
case valueTypeString:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool {
return c.encodedValues[i] == ""
return valuesEncoded[i] == ""
})
case valueTypeDict:
if !slices.Contains(c.dictValues, "") {
return 0
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool {
dictIdx := c.encodedValues[i][0]
dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == ""
})
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
@ -145,7 +147,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
// Fast path for count_empty(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] == "" {
if c.valuesEncoded[0] == "" {
scp.rowsCount++
}
return 0
@ -155,12 +157,14 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
}
switch c.valueType {
case valueTypeString:
if v := c.encodedValues[rowIdx]; v == "" {
valuesEncoded := c.getValuesEncoded(br)
if v := valuesEncoded[rowIdx]; v == "" {
scp.rowsCount++
}
return 0
case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0]
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
if v := c.dictValues[dictIdx]; v == "" {
scp.rowsCount++
}

View file

@ -24,8 +24,8 @@ func (su *statsCountUniq) String() string {
return s
}
func (su *statsCountUniq) neededFields() []string {
return su.fields
func (su *statsCountUniq) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(su.fields)
}
func (su *statsCountUniq) newStatsProcessor() (statsProcessor, int) {
@ -122,7 +122,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
}
if c.isConst {
// count unique const values
v := c.encodedValues[0]
v := c.valuesEncoded[0]
if v == "" {
// Do not count empty values
return stateSizeIncrease
@ -156,7 +156,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
return stateSizeIncrease
}
// Count unique values across encodedValues
// Count unique values across values
values := c.getValues(br)
keyBuf := sup.keyBuf[:0]
for i, v := range values {
@ -278,7 +278,7 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
}
if c.isConst {
// count unique const values
v := c.encodedValues[0]
v := c.valuesEncoded[0]
if v == "" {
// Do not count empty values
return stateSizeIncrease
@ -295,7 +295,8 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
}
if c.valueType == valueTypeDict {
// count unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0]
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := c.dictValues[dictIdx]
if v == "" {
// Do not count empty values

View file

@ -3,8 +3,11 @@ package logstorage
import (
"math"
"slices"
"strconv"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type statsMax struct {
@ -16,14 +19,13 @@ func (sm *statsMax) String() string {
return "max(" + fieldNamesString(sm.fields) + ")"
}
func (sm *statsMax) neededFields() []string {
return sm.fields
func (sm *statsMax) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sm.fields)
}
func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
smp := &statsMaxProcessor{
sm: sm,
max: nan,
}
return smp, int(unsafe.Sizeof(*smp))
}
@ -31,62 +33,139 @@ func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
type statsMaxProcessor struct {
sm *statsMax
max float64
max string
hasMax bool
}
func (smp *statsMaxProcessor) updateStatsForAllRows(br *blockResult) int {
maxLen := len(smp.max)
if smp.sm.containsStar {
// Find the maximum value across all the columns
// Find the minimum value across all the columns
for _, c := range br.getColumns() {
f := c.getMaxValue()
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
}
smp.updateStateForColumn(br, c)
}
} else {
// Find the maximum value across the requested columns
// Find the minimum value across the requested columns
for _, field := range smp.sm.fields {
c := br.getColumnByName(field)
f := c.getMaxValue()
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
smp.updateStateForColumn(br, c)
}
}
}
return 0
return len(smp.max) - maxLen
}
func (smp *statsMaxProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
maxLen := len(smp.max)
if smp.sm.containsStar {
// Find the maximum value across all the fields for the given row
// Find the minimum value across all the fields for the given row
for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx)
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
}
v := c.getValueAtRow(br, rowIdx)
smp.updateStateString(v)
}
} else {
// Find the maximum value across the requested fields for the given row
// Find the minimum value across the requested fields for the given row
for _, field := range smp.sm.fields {
c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx)
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
v := c.getValueAtRow(br, rowIdx)
smp.updateStateString(v)
}
}
}
return 0
return maxLen - len(smp.max)
}
func (smp *statsMaxProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsMaxProcessor)
if src.max > smp.max {
smp.max = src.max
if src.hasMax {
smp.updateStateString(src.max)
}
}
func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
if len(br.timestamps) == 0 {
return
}
if c.isTime {
// Special case for time column
timestamps := br.timestamps
maxTimestamp := timestamps[len(timestamps)-1]
for _, timestamp := range timestamps[:len(timestamps)-1] {
if timestamp > maxTimestamp {
maxTimestamp = timestamp
}
}
bb := bbPool.Get()
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], maxTimestamp)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
return
}
if c.isConst {
// Special case for const column
v := c.valuesEncoded[0]
smp.updateStateString(v)
return
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValuesEncoded(br) {
smp.updateStateString(v)
}
case valueTypeDict:
for _, v := range c.dictValues {
smp.updateStateString(v)
}
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
bb := bbPool.Get()
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.maxValue)
bb := bbPool.Get()
bb.B = marshalFloat64String(bb.B[:0], f)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeIPv4:
bb := bbPool.Get()
bb.B = marshalIPv4String(bb.B[:0], uint32(c.maxValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeTimestampISO8601:
bb := bbPool.Get()
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.maxValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
}
}
func (smp *statsMaxProcessor) updateStateBytes(b []byte) {
v := bytesutil.ToUnsafeString(b)
smp.updateStateString(v)
}
func (smp *statsMaxProcessor) updateStateString(v string) {
if smp.hasMax && !lessString(smp.max, v) {
return
}
smp.max = strings.Clone(v)
smp.hasMax = true
}
func (smp *statsMaxProcessor) finalizeStats() string {
return strconv.FormatFloat(smp.max, 'f', -1, 64)
if !smp.hasMax {
return "NaN"
}
return smp.max
}
func parseStatsMax(lex *lexer) (*statsMax, error) {

View file

@ -14,8 +14,8 @@ func (sm *statsMedian) String() string {
return "median(" + fieldNamesString(sm.fields) + ")"
}
func (sm *statsMedian) neededFields() []string {
return sm.fields
func (sm *statsMedian) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sm.fields)
}
func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) {

View file

@ -3,8 +3,11 @@ package logstorage
import (
"math"
"slices"
"strconv"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type statsMin struct {
@ -16,14 +19,13 @@ func (sm *statsMin) String() string {
return "min(" + fieldNamesString(sm.fields) + ")"
}
func (sm *statsMin) neededFields() []string {
return sm.fields
func (sm *statsMin) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sm.fields)
}
func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
smp := &statsMinProcessor{
sm: sm,
min: nan,
}
return smp, int(unsafe.Sizeof(*smp))
}
@ -31,62 +33,139 @@ func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
type statsMinProcessor struct {
sm *statsMin
min float64
min string
hasMin bool
}
func (smp *statsMinProcessor) updateStatsForAllRows(br *blockResult) int {
minLen := len(smp.min)
if smp.sm.containsStar {
// Find the minimum value across all the columns
for _, c := range br.getColumns() {
f := c.getMinValue()
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
}
smp.updateStateForColumn(br, c)
}
} else {
// Find the minimum value across the requested columns
for _, field := range smp.sm.fields {
c := br.getColumnByName(field)
f := c.getMinValue()
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
smp.updateStateForColumn(br, c)
}
}
}
return 0
return len(smp.min) - minLen
}
func (smp *statsMinProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
minLen := len(smp.min)
if smp.sm.containsStar {
// Find the minimum value across all the fields for the given row
for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx)
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
}
v := c.getValueAtRow(br, rowIdx)
smp.updateStateString(v)
}
} else {
// Find the minimum value across the requested fields for the given row
for _, field := range smp.sm.fields {
c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx)
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
v := c.getValueAtRow(br, rowIdx)
smp.updateStateString(v)
}
}
}
return 0
return minLen - len(smp.min)
}
func (smp *statsMinProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsMinProcessor)
if src.min < smp.min {
smp.min = src.min
if src.hasMin {
smp.updateStateString(src.min)
}
}
func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
if len(br.timestamps) == 0 {
return
}
if c.isTime {
// Special case for time column
timestamps := br.timestamps
minTimestamp := timestamps[0]
for _, timestamp := range timestamps[1:] {
if timestamp < minTimestamp {
minTimestamp = timestamp
}
}
bb := bbPool.Get()
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], minTimestamp)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
return
}
if c.isConst {
// Special case for const column
v := c.valuesEncoded[0]
smp.updateStateString(v)
return
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValuesEncoded(br) {
smp.updateStateString(v)
}
case valueTypeDict:
for _, v := range c.dictValues {
smp.updateStateString(v)
}
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
bb := bbPool.Get()
bb.B = marshalUint64String(bb.B[:0], c.minValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.minValue)
bb := bbPool.Get()
bb.B = marshalFloat64String(bb.B[:0], f)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeIPv4:
bb := bbPool.Get()
bb.B = marshalIPv4String(bb.B[:0], uint32(c.minValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeTimestampISO8601:
bb := bbPool.Get()
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.minValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
}
}
func (smp *statsMinProcessor) updateStateBytes(b []byte) {
v := bytesutil.ToUnsafeString(b)
smp.updateStateString(v)
}
func (smp *statsMinProcessor) updateStateString(v string) {
if smp.hasMin && !lessString(v, smp.min) {
return
}
smp.min = strings.Clone(v)
smp.hasMin = true
}
func (smp *statsMinProcessor) finalizeStats() string {
return strconv.FormatFloat(smp.min, 'f', -1, 64)
if !smp.hasMin {
return "NaN"
}
return smp.min
}
func parseStatsMin(lex *lexer) (*statsMin, error) {

View file

@ -8,6 +8,9 @@ import (
"unsafe"
"github.com/valyala/fastrand"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type statsQuantile struct {
@ -21,8 +24,8 @@ func (sq *statsQuantile) String() string {
return fmt.Sprintf("quantile(%g, %s)", sq.phi, fieldNamesString(sq.fields))
}
func (sq *statsQuantile) neededFields() []string {
return sq.fields
func (sq *statsQuantile) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sq.fields)
}
func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) {
@ -39,27 +42,16 @@ type statsQuantileProcessor struct {
}
func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int {
h := &sqp.h
stateSizeIncrease := 0
if sqp.sq.containsStar {
for _, c := range br.getColumns() {
for _, v := range c.getValues(br) {
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
stateSizeIncrease += sqp.updateStateForColumn(br, c)
}
} else {
for _, field := range sqp.sq.fields {
c := br.getColumnByName(field)
for _, v := range c.getValues(br) {
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
stateSizeIncrease += sqp.updateStateForColumn(br, c)
}
}
@ -72,16 +64,16 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
if sqp.sq.containsStar {
for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
stateSizeIncrease += h.update(f)
}
}
} else {
for _, field := range sqp.sq.fields {
c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
stateSizeIncrease += h.update(f)
}
}
@ -90,6 +82,85 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) int {
h := &sqp.h
stateSizeIncrease := 0
if c.isConst {
f, ok := tryParseFloat64(c.valuesEncoded[0])
if ok {
for range br.timestamps {
stateSizeIncrease += h.update(f)
}
}
return stateSizeIncrease
}
if c.isTime {
return 0
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValues(br) {
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
case valueTypeDict:
dictValues := c.dictValues
a := encoding.GetFloat64s(len(dictValues))
for i, v := range dictValues {
f, ok := tryParseFloat64(v)
if !ok {
f = nan
}
a.A[i] = f
}
for _, v := range c.getValuesEncoded(br) {
idx := v[0]
f := a.A[idx]
if !math.IsNaN(f) {
h.update(f)
}
}
encoding.PutFloat64s(a)
case valueTypeUint8:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint8(v)
h.update(float64(n))
}
case valueTypeUint16:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint16(v)
h.update(float64(n))
}
case valueTypeUint32:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint32(v)
h.update(float64(n))
}
case valueTypeUint64:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint64(v)
h.update(float64(n))
}
case valueTypeFloat64:
for _, v := range c.getValuesEncoded(br) {
f := unmarshalFloat64(v)
if !math.IsNaN(f) {
h.update(f)
}
}
case valueTypeIPv4:
case valueTypeTimestampISO8601:
default:
logger.Panicf("BUG: unexpected valueType=%d", c.valueType)
}
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsQuantileProcessor)
sqp.h.mergeState(&src.h)

View file

@ -16,8 +16,8 @@ func (ss *statsSum) String() string {
return "sum(" + fieldNamesString(ss.fields) + ")"
}
func (ss *statsSum) neededFields() []string {
return ss.fields
func (ss *statsSum) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(ss.fields)
}
func (ss *statsSum) newStatsProcessor() (statsProcessor, int) {
@ -38,27 +38,13 @@ func (ssp *statsSumProcessor) updateStatsForAllRows(br *blockResult) int {
if ssp.ss.containsStar {
// Sum all the columns
for _, c := range br.getColumns() {
f, count := c.sumValues(br)
if count > 0 {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
ssp.updateStateForColumn(br, c)
}
} else {
// Sum the requested columns
for _, field := range ssp.ss.fields {
c := br.getColumnByName(field)
f, count := c.sumValues(br)
if count > 0 {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
ssp.updateStateForColumn(br, c)
}
}
return 0
@ -68,31 +54,38 @@ func (ssp *statsSumProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
if ssp.ss.containsStar {
// Sum all the fields for the given row
for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
ssp.updateState(f)
}
}
} else {
// Sum only the given fields for the given row
for _, field := range ssp.ss.fields {
c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx)
if !math.IsNaN(f) {
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
ssp.updateState(f)
}
}
}
return 0
}
func (ssp *statsSumProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
f, count := c.sumValues(br)
if count > 0 {
ssp.updateState(f)
}
}
func (ssp *statsSumProcessor) updateState(f float64) {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
}
}
return 0
}
func (ssp *statsSumProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsSumProcessor)

View file

@ -15,8 +15,8 @@ func (ss *statsSumLen) String() string {
return "sum_len(" + fieldNamesString(ss.fields) + ")"
}
func (ss *statsSumLen) neededFields() []string {
return ss.fields
func (ss *statsSumLen) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(ss.fields)
}
func (ss *statsSumLen) newStatsProcessor() (statsProcessor, int) {

View file

@ -24,8 +24,8 @@ func (su *statsUniqValues) String() string {
return s
}
func (su *statsUniqValues) neededFields() []string {
return su.fields
func (su *statsUniqValues) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(su.fields)
}
func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) {
@ -68,7 +68,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultC
stateSizeIncrease := 0
if c.isConst {
// collect unique const values
v := c.encodedValues[0]
v := c.valuesEncoded[0]
if v == "" {
// skip empty values
return stateSizeIncrease
@ -141,7 +141,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
stateSizeIncrease := 0
if c.isConst {
// collect unique const values
v := c.encodedValues[0]
v := c.valuesEncoded[0]
if v == "" {
// skip empty values
return stateSizeIncrease
@ -155,7 +155,8 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
}
if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0]
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := c.dictValues[dictIdx]
if v == "" {
// skip empty values

View file

@ -21,8 +21,8 @@ func (sv *statsValues) String() string {
return s
}
func (sv *statsValues) neededFields() []string {
return sv.fields
func (sv *statsValues) updateNeededFields(neededFields fieldsSet) {
neededFields.addFields(sv.fields)
}
func (sv *statsValues) newStatsProcessor() (statsProcessor, int) {
@ -61,7 +61,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRows(br *blockResult) int {
func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
stateSizeIncrease := 0
if c.isConst {
v := strings.Clone(c.encodedValues[0])
v := strings.Clone(c.valuesEncoded[0])
stateSizeIncrease += len(v)
values := svp.values
@ -81,7 +81,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColum
}
values := svp.values
for _, encodedValue := range c.encodedValues {
for _, encodedValue := range c.getValuesEncoded(br) {
idx := encodedValue[0]
values = append(values, dictValues[idx])
}
@ -128,7 +128,7 @@ func (svp *statsValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int)
func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
stateSizeIncrease := 0
if c.isConst {
v := strings.Clone(c.encodedValues[0])
v := strings.Clone(c.valuesEncoded[0])
stateSizeIncrease += len(v)
svp.values = append(svp.values, v)
@ -138,7 +138,8 @@ func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, b
}
if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0]
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := strings.Clone(c.dictValues[dictIdx])
stateSizeIncrease += len(v)

View file

@ -2,12 +2,15 @@ package logstorage
import (
"context"
"fmt"
"math"
"slices"
"sort"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// genericSearchOptions contain options used for search.
@ -60,8 +63,44 @@ type searchOptions struct {
needAllColumns bool
}
// WriteBlockFunc must write a block with the given timestamps and columns.
//
// WriteBlockFunc cannot hold references to timestamps and columns after returning.
type WriteBlockFunc func(workerID uint, timestamps []int64, columns []BlockColumn)
// RunQuery runs the given q and calls writeBlock for results.
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock func(workerID uint, timestamps []int64, columns []BlockColumn)) error {
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) error {
qNew, err := s.initFilterInValues(ctx, tenantIDs, q)
if err != nil {
return err
}
writeBlockResult := func(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
brs := getBlockRows()
csDst := brs.cs
cs := br.getColumns()
for _, c := range cs {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
writeBlock(workerID, br.timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
}
return s.runQuery(ctx, tenantIDs, qNew, writeBlockResult)
}
func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlockResultFunc func(workerID uint, br *blockResult)) error {
neededColumnNames, unneededColumnNames := q.getNeededColumns()
so := &genericSearchOptions{
tenantIDs: tenantIDs,
@ -73,24 +112,8 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
workersCount := cgroup.AvailableCPUs()
pp := newDefaultPipeProcessor(func(workerID uint, br *blockResult) {
brs := getBlockRows()
csDst := brs.cs
for _, c := range br.getColumns() {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
writeBlock(workerID, br.timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
})
ppMain := pp
ppMain := newDefaultPipeProcessor(writeBlockResultFunc)
pp := ppMain
stopCh := ctx.Done()
cancels := make([]func(), len(q.pipes))
pps := make([]pipeProcessor, len(q.pipes))
@ -121,6 +144,202 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
return errFlush
}
// GetFieldNames returns field names from q results for the given tenantIDs.
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
// add `field_names ...` to the end of q.pipes
pipes := append([]pipe{}, q.pipes...)
pipeStr := "field_names as names"
lex := newLexer(pipeStr)
pf, err := parsePipeFieldNames(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'field_names' pipe: %s", err)
}
pf.isFirstPipe = len(pipes) == 0
pipes = append(pipes, pf)
q = &Query{
f: q.f,
pipes: pipes,
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
}
// GetFieldValues returns unique values for the given fieldName returned by q for the given tenantIDs.
//
// If limit > 0, then up to limit unique values are returned. The values are returned in arbitrary order because of performance reasons.
// The caller may sort the returned values if needed.
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]string, error) {
// add 'uniq fieldName' to the end of q.pipes
if !endsWithPipeUniqSingleField(q.pipes, fieldName) {
pipes := append([]pipe{}, q.pipes...)
pipeStr := fmt.Sprintf("uniq by (%s) limit %d", quoteTokenIfNeeded(fieldName), limit)
lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe: %s", err)
}
pipes = append(pipes, pu)
q = &Query{
f: q.f,
pipes: pipes,
}
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
}
func endsWithPipeUniqSingleField(pipes []pipe, fieldName string) bool {
if len(pipes) == 0 {
return false
}
pu, ok := pipes[len(pipes)-1].(*pipeUniq)
if !ok {
return false
}
return len(pu.byFields) == 1 && pu.byFields[0] == fieldName
}
func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
var values []string
var valuesLock sync.Mutex
writeBlockResult := func(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
cs := br.getColumns()
if len(cs) != 1 {
logger.Panicf("BUG: expecting only a single column; got %d columns", len(cs))
}
columnValues := cs[0].getValues(br)
columnValuesCopy := make([]string, len(columnValues))
for i, v := range columnValues {
columnValuesCopy[i] = strings.Clone(v)
}
valuesLock.Lock()
values = append(values, columnValuesCopy...)
valuesLock.Unlock()
}
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
if err != nil {
return nil, err
}
return values, nil
}
func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
if !hasFilterInWithQueryForFilter(q.f) && !hasFilterInWithQueryForPipes(q.pipes) {
return q, nil
}
getFieldValues := func(q *Query, fieldName string) ([]string, error) {
return s.GetFieldValues(ctx, tenantIDs, q, fieldName, 0)
}
cache := make(map[string][]string)
fNew, err := initFilterInValuesForFilter(cache, q.f, getFieldValues)
if err != nil {
return nil, err
}
pipesNew, err := initFilterInValuesForPipes(cache, q.pipes, getFieldValues)
if err != nil {
return nil, err
}
qNew := &Query{
f: fNew,
pipes: pipesNew,
}
return qNew, nil
}
func hasFilterInWithQueryForFilter(f filter) bool {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
return ok && fi.needExecuteQuery
}
return visitFilter(f, visitFunc)
}
func hasFilterInWithQueryForPipes(pipes []pipe) bool {
for _, p := range pipes {
ps, ok := p.(*pipeStats)
if !ok {
continue
}
for _, f := range ps.funcs {
if f.iff != nil && hasFilterInWithQueryForFilter(f.iff) {
return true
}
}
}
return false
}
type getFieldValuesFunc func(q *Query, fieldName string) ([]string, error)
func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldValuesFunc getFieldValuesFunc) (filter, error) {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
return ok && fi.needExecuteQuery
}
copyFunc := func(f filter) (filter, error) {
fi := f.(*filterIn)
qStr := fi.q.String()
values, ok := cache[qStr]
if !ok {
vs, err := getFieldValuesFunc(fi.q, fi.qFieldName)
if err != nil {
return nil, fmt.Errorf("cannot obtain unique values for %s: %w", fi, err)
}
cache[qStr] = vs
values = vs
}
fiNew := &filterIn{
fieldName: fi.fieldName,
q: fi.q,
values: values,
}
return fiNew, nil
}
return copyFilter(f, visitFunc, copyFunc)
}
func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
pipesNew := make([]pipe, len(pipes))
for i, p := range pipes {
switch t := p.(type) {
case *pipeStats:
funcsNew := make([]pipeStatsFunc, len(t.funcs))
for j, f := range t.funcs {
if f.iff != nil {
fNew, err := initFilterInValuesForFilter(cache, f.iff, getFieldValuesFunc)
if err != nil {
return nil, err
}
f.iff = fNew
}
funcsNew[j] = f
}
pipesNew[i] = &pipeStats{
byFields: t.byFields,
funcs: funcsNew,
}
default:
pipesNew[i] = p
}
}
return pipesNew, nil
}
type blockRows struct {
cs []BlockColumn
}
@ -169,7 +388,7 @@ type searchResultFunc func(workerID uint, br *blockResult)
// search searches for the matching rows according to so.
//
// It calls processBlockResult for each found matching block.
// It calls processBlockResult for each matching block.
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
// Spin up workers
var wgWorkers sync.WaitGroup
@ -178,6 +397,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
for i := 0; i < workersCount; i++ {
go func(workerID uint) {
bs := getBlockSearch()
bm := getBitmap(0)
for bswb := range workCh {
bsws := bswb.bsws
for i := range bsws {
@ -188,7 +408,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
continue
}
bs.search(bsw)
bs.search(bsw, bm)
if len(bs.br.timestamps) > 0 {
processBlockResult(workerID, &bs.br)
}
@ -198,22 +418,24 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
putBlockSearchWorkBatch(bswb)
}
putBlockSearch(bs)
putBitmap(bm)
wgWorkers.Done()
}(uint(i))
}
// Obtain common time filter from so.filter
ft, f := getCommonFilterTime(so.filter)
// Obtain time range from so.filter
f := so.filter
minTimestamp, maxTimestamp := getFilterTimeRange(f)
// Select partitions according to the selected time range
s.partitionsLock.Lock()
ptws := s.partitions
minDay := ft.minTimestamp / nsecPerDay
minDay := minTimestamp / nsecPerDay
n := sort.Search(len(ptws), func(i int) bool {
return ptws[i].day >= minDay
})
ptws = ptws[n:]
maxDay := ft.maxTimestamp / nsecPerDay
maxDay := maxTimestamp / nsecPerDay
n = sort.Search(len(ptws), func(i int) bool {
return ptws[i].day > maxDay
})
@ -234,7 +456,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
partitionSearchConcurrencyLimitCh <- struct{}{}
wgSearchers.Add(1)
go func(idx int, pt *partition) {
psfs[idx] = pt.search(ft, sf, f, so, workCh, stopCh)
psfs[idx] = pt.search(minTimestamp, maxTimestamp, sf, f, so, workCh, stopCh)
wgSearchers.Done()
<-partitionSearchConcurrencyLimitCh
}(i, ptw.pt)
@ -263,7 +485,7 @@ var partitionSearchConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs
type partitionSearchFinalizer func()
func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
func (pt *partition) search(minTimestamp, maxTimestamp int64, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
if needStop(stopCh) {
// Do not spend CPU time on search, since it is already stopped.
return func() {}
@ -281,8 +503,8 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
soInternal := &searchOptions{
tenantIDs: tenantIDs,
streamIDs: streamIDs,
minTimestamp: ft.minTimestamp,
maxTimestamp: ft.maxTimestamp,
minTimestamp: minTimestamp,
maxTimestamp: maxTimestamp,
filter: f,
neededColumnNames: so.neededColumnNames,
unneededColumnNames: so.unneededColumnNames,
@ -292,60 +514,32 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
}
func hasStreamFilters(f filter) bool {
switch t := f.(type) {
case *filterAnd:
return hasStreamFiltersInList(t.filters)
case *filterOr:
return hasStreamFiltersInList(t.filters)
case *filterNot:
return hasStreamFilters(t.f)
case *filterStream:
return true
default:
return false
visitFunc := func(f filter) bool {
_, ok := f.(*filterStream)
return ok
}
}
func hasStreamFiltersInList(filters []filter) bool {
for _, f := range filters {
if hasStreamFilters(f) {
return true
}
}
return false
return visitFilter(f, visitFunc)
}
func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
switch t := f.(type) {
case *filterAnd:
return &filterAnd{
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
visitFunc := func(f filter) bool {
_, ok := f.(*filterStream)
return ok
}
case *filterOr:
return &filterOr{
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
}
case *filterNot:
return &filterNot{
f: initStreamFilters(tenantIDs, idb, t.f),
}
case *filterStream:
return &filterStream{
f: t.f,
copyFunc := func(f filter) (filter, error) {
fs := f.(*filterStream)
fsNew := &filterStream{
f: fs.f,
tenantIDs: tenantIDs,
idb: idb,
}
default:
return t
return fsNew, nil
}
f, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter {
result := make([]filter, len(filters))
for i, f := range filters {
result[i] = initStreamFilters(tenantIDs, idb, f)
}
return result
return f
}
func (ddb *datadb) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
@ -646,23 +840,25 @@ func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
return nil, f
}
func getCommonFilterTime(f filter) (*filterTime, filter) {
func getFilterTimeRange(f filter) (int64, int64) {
switch t := f.(type) {
case *filterAnd:
minTimestamp := int64(math.MinInt64)
maxTimestamp := int64(math.MaxInt64)
for _, filter := range t.filters {
ft, ok := filter.(*filterTime)
if ok {
// The ft must remain in t.filters order to properly filter out rows outside the selected time range
return ft, f
if ft.minTimestamp > minTimestamp {
minTimestamp = ft.minTimestamp
}
if ft.maxTimestamp < maxTimestamp {
maxTimestamp = ft.maxTimestamp
}
}
}
return minTimestamp, maxTimestamp
case *filterTime:
return t, f
return t.minTimestamp, t.maxTimestamp
}
return allFilterTime, f
}
var allFilterTime = &filterTime{
minTimestamp: math.MinInt64,
maxTimestamp: math.MaxInt64,
return math.MinInt64, math.MaxInt64
}

View file

@ -78,6 +78,14 @@ func TestStorageRunQuery(t *testing.T) {
}
s.debugFlush()
mustRunQuery := func(tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) {
t.Helper()
err := s.RunQuery(context.Background(), tenantIDs, q, writeBlock)
if err != nil {
t.Fatalf("unexpected error returned from the query %s: %s", q, err)
}
}
// run tests on the storage data
t.Run("missing-tenant", func(_ *testing.T) {
q := mustParseQuery(`"log message"`)
@ -89,7 +97,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
})
t.Run("missing-message-text", func(_ *testing.T) {
q := mustParseQuery(`foobar`)
@ -101,7 +109,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
})
t.Run("matching-tenant-id", func(t *testing.T) {
q := mustParseQuery(`tenant.id:*`)
@ -135,7 +143,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -149,7 +157,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
mustRunQuery(allTenantIDs, q, writeBlock)
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -162,7 +170,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
mustRunQuery(allTenantIDs, q, writeBlock)
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -174,7 +182,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
}
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
mustRunQuery(allTenantIDs, q, writeBlock)
})
t.Run("matching-stream-id", func(t *testing.T) {
for i := 0; i < streamsPerTenant; i++ {
@ -208,7 +216,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -227,7 +235,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream * 2
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -247,7 +255,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -267,7 +275,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := blocksPerStream
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -286,7 +294,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
})
t.Run("missing-time-range", func(_ *testing.T) {
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
@ -300,7 +308,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
}
tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
mustRunQuery(tenantIDs, q, writeBlock)
})
// Close the storage and delete its data
@ -308,13 +316,6 @@ func TestStorageRunQuery(t *testing.T) {
fs.MustRemoveAll(path)
}
func checkErr(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
}
func mustParseQuery(query string) *Query {
q, err := ParseQuery(query)
if err != nil {
@ -470,9 +471,6 @@ func TestStorageSearch(t *testing.T) {
}
var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps)))
}
s.search(workersCount, so, nil, processBlock)
@ -504,7 +502,7 @@ func TestStorageSearch(t *testing.T) {
}
})
t.Run("stream-filter-mismatch", func(_ *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
minTimestamp := baseTimestamp
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
@ -520,7 +518,7 @@ func TestStorageSearch(t *testing.T) {
})
t.Run("matching-stream-id", func(t *testing.T) {
for i := 0; i < streamsPerTenant; i++ {
sf := mustNewStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
sf := mustNewTestStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
tenantID := TenantID{
AccountID: 1,
ProjectID: 11,
@ -535,9 +533,6 @@ func TestStorageSearch(t *testing.T) {
}
var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps)))
}
s.search(workersCount, so, nil, processBlock)
@ -549,7 +544,7 @@ func TestStorageSearch(t *testing.T) {
}
})
t.Run("matching-multiple-stream-ids", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
tenantID := TenantID{
AccountID: 1,
ProjectID: 11,
@ -564,9 +559,6 @@ func TestStorageSearch(t *testing.T) {
}
var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps)))
}
s.search(workersCount, so, nil, processBlock)
@ -577,7 +569,7 @@ func TestStorageSearch(t *testing.T) {
}
})
t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
tenantID := TenantID{
AccountID: 1,
ProjectID: 11,
@ -601,9 +593,6 @@ func TestStorageSearch(t *testing.T) {
}
var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps)))
}
s.search(workersCount, so, nil, processBlock)
@ -614,7 +603,7 @@ func TestStorageSearch(t *testing.T) {
}
})
t.Run("matching-stream-id-smaller-time-range", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
tenantID := TenantID{
AccountID: 1,
ProjectID: 11,
@ -639,7 +628,7 @@ func TestStorageSearch(t *testing.T) {
}
})
t.Run("matching-stream-id-missing-time-range", func(_ *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
tenantID := TenantID{
AccountID: 1,
ProjectID: 11,
@ -661,11 +650,3 @@ func TestStorageSearch(t *testing.T) {
s.MustClose()
fs.MustRemoveAll(path)
}
func mustNewStreamFilter(s string) *StreamFilter {
sf, err := newStreamFilter(s)
if err != nil {
panic(fmt.Errorf("unexpected error in newStreamFilter(%q): %w", s, err))
}
return sf
}

View file

@ -1,11 +1,14 @@
package logstorage
import (
"fmt"
"strconv"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
@ -14,6 +17,29 @@ type StreamFilter struct {
orFilters []*andStreamFilter
}
func (sf *StreamFilter) matchStreamName(s string) bool {
sn := getStreamName()
defer putStreamName(sn)
if !sn.parse(s) {
return false
}
for _, of := range sf.orFilters {
matchAndFilters := true
for _, tf := range of.tagFilters {
if !sn.match(tf) {
matchAndFilters = false
break
}
}
if matchAndFilters {
return true
}
}
return false
}
func (sf *StreamFilter) isEmpty() bool {
for _, af := range sf.orFilters {
if len(af.tagFilters) > 0 {
@ -69,10 +95,199 @@ type streamTagFilter struct {
regexp *regexutil.PromRegex
}
func (tf *streamTagFilter) getRegexp() *regexutil.PromRegex {
return tf.regexp
}
func (tf *streamTagFilter) String() string {
return quoteTokenIfNeeded(tf.tagName) + tf.op + strconv.Quote(tf.value)
}
func parseStreamFilter(lex *lexer) (*StreamFilter, error) {
if !lex.isKeyword("{") {
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after '{'")
}
var filters []*andStreamFilter
for {
f, err := parseAndStreamFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("}"):
lex.nextToken()
sf := &StreamFilter{
orFilters: filters,
}
return sf, nil
case lex.isKeyword("or"):
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
}
if lex.isKeyword("}") {
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
}
default:
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
}
}
}
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
var filters []*streamTagFilter
for {
if lex.isKeyword("}") {
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
}
f, err := parseStreamTagFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("or", "}"):
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
case lex.isKeyword(","):
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing stream filter after ','")
}
default:
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
}
}
}
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
tagName := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
}
if !lex.isKeyword("=", "!=", "=~", "!~") {
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
}
op := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
}
value := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
}
stf := &streamTagFilter{
tagName: tagName,
op: op,
value: value,
}
if op == "=~" || op == "!~" {
re, err := regexutil.NewPromRegex(value)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
}
stf.regexp = re
}
return stf, nil
}
func getStreamName() *streamName {
v := streamNamePool.Get()
if v == nil {
return &streamName{}
}
return v.(*streamName)
}
func putStreamName(sn *streamName) {
sn.reset()
streamNamePool.Put(sn)
}
var streamNamePool sync.Pool
type streamName struct {
tags []Field
}
func (sn *streamName) reset() {
clear(sn.tags)
sn.tags = sn.tags[:0]
}
func (sn *streamName) parse(s string) bool {
if len(s) < 2 || s[0] != '{' || s[len(s)-1] != '}' {
return false
}
s = s[1 : len(s)-1]
if len(s) == 0 {
return true
}
for {
// Parse tag name
n := strings.IndexByte(s, '=')
if n < 0 {
// cannot find tag name
return false
}
name := s[:n]
s = s[n+1:]
// Parse tag value
if len(s) == 0 || s[0] != '"' {
return false
}
qPrefix, err := strconv.QuotedPrefix(s)
if err != nil {
return false
}
s = s[len(qPrefix):]
value, err := strconv.Unquote(qPrefix)
if err != nil {
return false
}
sn.tags = append(sn.tags, Field{
Name: name,
Value: value,
})
if len(s) == 0 {
return true
}
if s[0] != ',' {
return false
}
s = s[1:]
}
}
func (sn *streamName) match(tf *streamTagFilter) bool {
v := sn.getTagValueByTagName(tf.tagName)
switch tf.op {
case "=":
return v == tf.value
case "!=":
return v != tf.value
case "=~":
return tf.regexp.MatchString(v)
case "!~":
return !tf.regexp.MatchString(v)
default:
logger.Panicf("BUG: unexpected tagFilter operation: %q", tf.op)
return false
}
}
func (sn *streamName) getTagValueByTagName(name string) string {
for _, t := range sn.tags {
if t.Name == name {
return t.Value
}
}
return ""
}

Some files were not shown because too many files have changed in this diff Show more