lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-20 04:08:30 +02:00
parent 3661373cc2
commit ad505a7a9a
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
105 changed files with 7794 additions and 1945 deletions

View file

@ -20,7 +20,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -210,8 +209,8 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
return false, fmt.Errorf(`missing log message after the "create" or "index" command`) return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
} }
line = sc.Bytes() line = sc.Bytes()
p := logjson.GetParser() p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line); err != nil { if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err) return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
} }
@ -224,7 +223,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
} }
p.RenameField(msgField, "_msg") p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields) processLogMessage(ts, p.Fields)
logjson.PutParser(p) logstorage.PutJSONParser(p)
return true, nil return true, nil
} }

View file

@ -12,7 +12,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -105,8 +104,8 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
line = sc.Bytes() line = sc.Bytes()
} }
p := logjson.GetParser() p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line); err != nil { if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err) return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
} }
ts, err := extractTimestampFromFields(timeField, p.Fields) ts, err := extractTimestampFromFields(timeField, p.Fields)
@ -118,7 +117,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
} }
p.RenameField(msgField, "_msg") p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields) processLogMessage(ts, p.Fields)
logjson.PutParser(p) logstorage.PutJSONParser(p)
return true, nil return true, nil
} }

View file

@ -50,7 +50,8 @@ Below is an example output:
"u64_0": "4810489083243239145", "u64_0": "4810489083243239145",
"float_0": "1.868", "float_0": "1.868",
"ip_0": "250.34.75.125", "ip_0": "250.34.75.125",
"timestamp_0": "1799-03-16T01:34:18.311Z" "timestamp_0": "1799-03-16T01:34:18.311Z",
"json_0": "{\"foo\":\"bar_3\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":32}"
} }
{ {
"_time": "2024-05-08T14:34:00.854Z", "_time": "2024-05-08T14:34:00.854Z",
@ -70,7 +71,8 @@ Below is an example output:
"u64_0": "6593354256620219850", "u64_0": "6593354256620219850",
"float_0": "1.085", "float_0": "1.085",
"ip_0": "253.151.88.158", "ip_0": "253.151.88.158",
"timestamp_0": "2042-10-05T16:42:57.082Z" "timestamp_0": "2042-10-05T16:42:57.082Z",
"json_0": "{\"foo\":\"bar_5\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":27}"
} }
``` ```

View file

@ -51,6 +51,8 @@ var (
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model") "see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
timestampFieldsPerLog = flag.Int("timestampFieldsPerLog", 1, "The number of fields with ISO8601 timestamps per each log entry; "+ timestampFieldsPerLog = flag.Int("timestampFieldsPerLog", 1, "The number of fields with ISO8601 timestamps per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model") "see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
jsonFieldsPerLog = flag.Int("jsonFieldsPerLog", 1, "The number of JSON fields to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
statInterval = flag.Duration("statInterval", 10*time.Second, "The interval between publishing the stats") statInterval = flag.Duration("statInterval", 10*time.Second, "The interval between publishing the stats")
) )
@ -263,6 +265,9 @@ func generateLogsAtTimestamp(bw *bufio.Writer, workerID int, ts int64, firstStre
timestamp := toISO8601(int64(rand.Uint64())) timestamp := toISO8601(int64(rand.Uint64()))
fmt.Fprintf(bw, `,"timestamp_%d":"%s"`, j, timestamp) fmt.Fprintf(bw, `,"timestamp_%d":"%s"`, j, timestamp)
} }
for j := 0; j < *jsonFieldsPerLog; j++ {
fmt.Fprintf(bw, `,"json_%d":"{\"foo\":\"bar_%d\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":%d}"`, j, rand.Intn(10), rand.Intn(100))
}
fmt.Fprintf(bw, "}\n") fmt.Fprintf(bw, "}\n")
logEntriesCount.Add(1) logEntriesCount.Add(1)

View file

@ -0,0 +1,17 @@
{% stripspace %}
// FieldNamesResponse formats /select/logsql/field_names response
{% func FieldNamesResponse(names []string) %}
{
"names":[
{% if len(names) > 0 %}
{%q= names[0] %}
{% for _, v := range names[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,69 @@
// Code generated by qtc from "field_names_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldNamesResponse formats /select/logsql/field_names response
//line app/vlselect/logsql/field_names_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_names_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_names_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_names_response.qtpl:4
func StreamFieldNamesResponse(qw422016 *qt422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:4
qw422016.N().S(`{"names":[`)
//line app/vlselect/logsql/field_names_response.qtpl:7
if len(names) > 0 {
//line app/vlselect/logsql/field_names_response.qtpl:8
qw422016.N().Q(names[0])
//line app/vlselect/logsql/field_names_response.qtpl:9
for _, v := range names[1:] {
//line app/vlselect/logsql/field_names_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_names_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_names_response.qtpl:11
}
//line app/vlselect/logsql/field_names_response.qtpl:12
}
//line app/vlselect/logsql/field_names_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func WriteFieldNamesResponse(qq422016 qtio422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
StreamFieldNamesResponse(qw422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func FieldNamesResponse(names []string) string {
//line app/vlselect/logsql/field_names_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_names_response.qtpl:15
WriteFieldNamesResponse(qb422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_names_response.qtpl:15
}

View file

@ -0,0 +1,17 @@
{% stripspace %}
// FieldValuesResponse formats /select/logsql/field_values response
{% func FieldValuesResponse(values []string) %}
{
"values":[
{% if len(values) > 0 %}
{%q= values[0] %}
{% for _, v := range values[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,69 @@
// Code generated by qtc from "field_values_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldValuesResponse formats /select/logsql/field_values response
//line app/vlselect/logsql/field_values_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_values_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_values_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_values_response.qtpl:4
func StreamFieldValuesResponse(qw422016 *qt422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:4
qw422016.N().S(`{"values":[`)
//line app/vlselect/logsql/field_values_response.qtpl:7
if len(values) > 0 {
//line app/vlselect/logsql/field_values_response.qtpl:8
qw422016.N().Q(values[0])
//line app/vlselect/logsql/field_values_response.qtpl:9
for _, v := range values[1:] {
//line app/vlselect/logsql/field_values_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_values_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_values_response.qtpl:11
}
//line app/vlselect/logsql/field_values_response.qtpl:12
}
//line app/vlselect/logsql/field_values_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func WriteFieldValuesResponse(qq422016 qtio422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
StreamFieldValuesResponse(qw422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func FieldValuesResponse(values []string) string {
//line app/vlselect/logsql/field_values_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_values_response.qtpl:15
WriteFieldValuesResponse(qb422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_values_response.qtpl:15
}

View file

@ -0,0 +1,69 @@
{% import (
"slices"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
) %}
{% stripspace %}
// LabelsForHits formats labels for /select/logsql/hits response
{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
{
{% if len(columns) > 0 %}
{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}
{% for _, c := range columns[1:] %}
,{%q= c.Name %}:{%q= c.Values[rowIdx] %}
{% endfor %}
{% endif %}
}
{% endfunc %}
{% func HitsSeries(m map[string]*hitsSeries) %}
{
{% code
sortedKeys := make([]string, 0, len(m))
for k := range m {
sortedKeys = append(sortedKeys, k)
}
slices.Sort(sortedKeys)
%}
"hits":[
{% if len(sortedKeys) > 0 %}
{%= hitsSeriesLine(m, sortedKeys[0]) %}
{% for _, k := range sortedKeys[1:] %}
,{%= hitsSeriesLine(m, k) %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% func hitsSeriesLine(m map[string]*hitsSeries, k string) %}
{
{% code
hs := m[k]
hs.sort()
timestamps := hs.timestamps
values := hs.values
%}
"fields":{%s= k %},
"timestamps":[
{% if len(timestamps) > 0 %}
{%q= timestamps[0] %}
{% for _, ts := range timestamps[1:] %}
,{%q= ts %}
{% endfor %}
{% endif %}
],
"values":[
{% if len(values) > 0 %}
{%s= values[0] %}
{% for _, v := range values[1:] %}
,{%s= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,219 @@
// Code generated by qtc from "hits_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vlselect/logsql/hits_response.qtpl:1
package logsql
//line app/vlselect/logsql/hits_response.qtpl:1
import (
"slices"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
// LabelsForHits formats labels for /select/logsql/hits response
//line app/vlselect/logsql/hits_response.qtpl:10
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/hits_response.qtpl:10
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/hits_response.qtpl:10
func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:10
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:12
if len(columns) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().Q(columns[0].Name)
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().S(`:`)
//line app/vlselect/logsql/hits_response.qtpl:13
qw422016.N().Q(columns[0].Values[rowIdx])
//line app/vlselect/logsql/hits_response.qtpl:14
for _, c := range columns[1:] {
//line app/vlselect/logsql/hits_response.qtpl:14
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().Q(c.Name)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().S(`:`)
//line app/vlselect/logsql/hits_response.qtpl:15
qw422016.N().Q(c.Values[rowIdx])
//line app/vlselect/logsql/hits_response.qtpl:16
}
//line app/vlselect/logsql/hits_response.qtpl:17
}
//line app/vlselect/logsql/hits_response.qtpl:17
qw422016.N().S(`}`)
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:19
func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:19
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:19
StreamLabelsForHits(qw422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:19
func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
//line app/vlselect/logsql/hits_response.qtpl:19
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:19
WriteLabelsForHits(qb422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:19
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:19
}
//line app/vlselect/logsql/hits_response.qtpl:21
func StreamHitsSeries(qw422016 *qt422016.Writer, m map[string]*hitsSeries) {
//line app/vlselect/logsql/hits_response.qtpl:21
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:24
sortedKeys := make([]string, 0, len(m))
for k := range m {
sortedKeys = append(sortedKeys, k)
}
slices.Sort(sortedKeys)
//line app/vlselect/logsql/hits_response.qtpl:29
qw422016.N().S(`"hits":[`)
//line app/vlselect/logsql/hits_response.qtpl:31
if len(sortedKeys) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:32
streamhitsSeriesLine(qw422016, m, sortedKeys[0])
//line app/vlselect/logsql/hits_response.qtpl:33
for _, k := range sortedKeys[1:] {
//line app/vlselect/logsql/hits_response.qtpl:33
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:34
streamhitsSeriesLine(qw422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:35
}
//line app/vlselect/logsql/hits_response.qtpl:36
}
//line app/vlselect/logsql/hits_response.qtpl:36
qw422016.N().S(`]}`)
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:39
func WriteHitsSeries(qq422016 qtio422016.Writer, m map[string]*hitsSeries) {
//line app/vlselect/logsql/hits_response.qtpl:39
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:39
StreamHitsSeries(qw422016, m)
//line app/vlselect/logsql/hits_response.qtpl:39
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:39
func HitsSeries(m map[string]*hitsSeries) string {
//line app/vlselect/logsql/hits_response.qtpl:39
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:39
WriteHitsSeries(qb422016, m)
//line app/vlselect/logsql/hits_response.qtpl:39
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:39
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:39
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:39
}
//line app/vlselect/logsql/hits_response.qtpl:41
func streamhitsSeriesLine(qw422016 *qt422016.Writer, m map[string]*hitsSeries, k string) {
//line app/vlselect/logsql/hits_response.qtpl:41
qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:44
hs := m[k]
hs.sort()
timestamps := hs.timestamps
values := hs.values
//line app/vlselect/logsql/hits_response.qtpl:48
qw422016.N().S(`"fields":`)
//line app/vlselect/logsql/hits_response.qtpl:49
qw422016.N().S(k)
//line app/vlselect/logsql/hits_response.qtpl:49
qw422016.N().S(`,"timestamps":[`)
//line app/vlselect/logsql/hits_response.qtpl:51
if len(timestamps) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:52
qw422016.N().Q(timestamps[0])
//line app/vlselect/logsql/hits_response.qtpl:53
for _, ts := range timestamps[1:] {
//line app/vlselect/logsql/hits_response.qtpl:53
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:54
qw422016.N().Q(ts)
//line app/vlselect/logsql/hits_response.qtpl:55
}
//line app/vlselect/logsql/hits_response.qtpl:56
}
//line app/vlselect/logsql/hits_response.qtpl:56
qw422016.N().S(`],"values":[`)
//line app/vlselect/logsql/hits_response.qtpl:59
if len(values) > 0 {
//line app/vlselect/logsql/hits_response.qtpl:60
qw422016.N().S(values[0])
//line app/vlselect/logsql/hits_response.qtpl:61
for _, v := range values[1:] {
//line app/vlselect/logsql/hits_response.qtpl:61
qw422016.N().S(`,`)
//line app/vlselect/logsql/hits_response.qtpl:62
qw422016.N().S(v)
//line app/vlselect/logsql/hits_response.qtpl:63
}
//line app/vlselect/logsql/hits_response.qtpl:64
}
//line app/vlselect/logsql/hits_response.qtpl:64
qw422016.N().S(`]}`)
//line app/vlselect/logsql/hits_response.qtpl:67
}
//line app/vlselect/logsql/hits_response.qtpl:67
func writehitsSeriesLine(qq422016 qtio422016.Writer, m map[string]*hitsSeries, k string) {
//line app/vlselect/logsql/hits_response.qtpl:67
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:67
streamhitsSeriesLine(qw422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:67
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:67
}
//line app/vlselect/logsql/hits_response.qtpl:67
func hitsSeriesLine(m map[string]*hitsSeries, k string) string {
//line app/vlselect/logsql/hits_response.qtpl:67
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:67
writehitsSeriesLine(qb422016, m, k)
//line app/vlselect/logsql/hits_response.qtpl:67
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:67
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/hits_response.qtpl:67
return qs422016
//line app/vlselect/logsql/hits_response.qtpl:67
}

View file

@ -5,6 +5,10 @@ import (
"fmt" "fmt"
"math" "math"
"net/http" "net/http"
"slices"
"sort"
"strings"
"sync"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage" "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
@ -15,44 +19,196 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
) )
// ProcessHitsRequest handles /select/logsql/hits request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats
func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Obtain step
stepStr := r.FormValue("step")
if stepStr == "" {
stepStr = "1d"
}
step, err := promutils.ParseDuration(stepStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse 'step' arg: %s", err)
return
}
if step <= 0 {
httpserver.Errorf(w, r, "'step' must be bigger than zero")
}
// Obtain offset
offsetStr := r.FormValue("offset")
if offsetStr == "" {
offsetStr = "0s"
}
offset, err := promutils.ParseDuration(offsetStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse 'offset' arg: %s", err)
return
}
// Obtain field entries
fields := r.Form["field"]
// Prepare the query
q.AddCountByTimePipe(int64(step), int64(offset), fields)
q.Optimize()
var mLock sync.Mutex
m := make(map[string]*hitsSeries)
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
if len(columns) == 0 || len(columns[0].Values) == 0 {
return
}
timestampValues := columns[0].Values
hitsValues := columns[len(columns)-1].Values
columns = columns[1 : len(columns)-1]
bb := blockResultPool.Get()
for i := range timestamps {
timestampStr := strings.Clone(timestampValues[i])
hitsStr := strings.Clone(hitsValues[i])
bb.Reset()
WriteLabelsForHits(bb, columns, i)
mLock.Lock()
hs, ok := m[string(bb.B)]
if !ok {
k := string(bb.B)
hs = &hitsSeries{}
m[k] = hs
}
hs.timestamps = append(hs.timestamps, timestampStr)
hs.values = append(hs.values, hitsStr)
mLock.Unlock()
}
blockResultPool.Put(bb)
}
// Execute the query
if err := vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock); err != nil {
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
return
}
// Write response
w.Header().Set("Content-Type", "application/json")
WriteHitsSeries(w, m)
}
type hitsSeries struct {
timestamps []string
values []string
}
func (hs *hitsSeries) sort() {
sort.Sort(hs)
}
func (hs *hitsSeries) Len() int {
return len(hs.timestamps)
}
func (hs *hitsSeries) Swap(i, j int) {
hs.timestamps[i], hs.timestamps[j] = hs.timestamps[j], hs.timestamps[i]
hs.values[i], hs.values[j] = hs.values[j], hs.values[i]
}
func (hs *hitsSeries) Less(i, j int) bool {
return hs.timestamps[i] < hs.timestamps[j]
}
// ProcessFieldNamesRequest handles /select/logsql/field_names request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names
func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Obtain field names for the given query
q.Optimize()
fieldNames, err := vlstorage.GetFieldNames(ctx, tenantIDs, q)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain field names: %s", err)
return
}
slices.Sort(fieldNames)
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldNamesResponse(w, fieldNames)
}
// ProcessFieldValuesRequest handles /select/logsql/field_values request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values
func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
// Parse fieldName query arg
fieldName := r.FormValue("field_name")
if fieldName == "" {
httpserver.Errorf(w, r, "missing 'field_name' query arg")
return
}
// Parse limit query arg
limit, err := httputils.GetInt(r, "limit")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
if limit < 0 {
limit = 0
}
// Obtain unique values for the given field
q.Optimize()
values, err := vlstorage.GetFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain values for field %q: %s", fieldName, err)
return
}
if limit == 0 || len(values) < limit {
// Sort values only if their number is below the limit.
// Otherwise there is little sense in sorting, since the query may return
// different subset of values on every execution.
slices.Sort(values)
}
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldValuesResponse(w, values)
}
// ProcessQueryRequest handles /select/logsql/query request. // ProcessQueryRequest handles /select/logsql/query request.
//
// See https://docs.victoriametrics.com/victorialogs/querying/#http-api
func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
// Extract tenantID q, tenantIDs, err := parseCommonArgs(r)
tenantID, err := logstorage.GetTenantIDFromRequest(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return return
} }
// Parse query
qStr := r.FormValue("query")
q, err := logstorage.ParseQuery(qStr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
return
}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
if okStart || okEnd {
if !okStart {
start = math.MinInt64
}
if !okEnd {
end = math.MaxInt64
}
q.AddTimeFilter(start, end)
}
// Parse limit query arg // Parse limit query arg
limit, err := httputils.GetInt(r, "limit") limit, err := httputils.GetInt(r, "limit")
if err != nil { if err != nil {
@ -62,14 +218,11 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
if limit > 0 { if limit > 0 {
q.AddPipeLimit(uint64(limit)) q.AddPipeLimit(uint64(limit))
} }
q.Optimize()
tenantIDs := []logstorage.TenantID{tenantID}
bw := getBufferedWriter(w) bw := getBufferedWriter(w)
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) { writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
if len(columns) == 0 { if len(columns) == 0 || len(columns[0].Values) == 0 {
return return
} }
@ -81,20 +234,57 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
blockResultPool.Put(bb) blockResultPool.Put(bb)
} }
w.Header().Set("Content-Type", "application/stream+json; charset=utf-8") w.Header().Set("Content-Type", "application/stream+json")
q.Optimize()
err = vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock) err = vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock)
bw.FlushIgnoreErrors() bw.FlushIgnoreErrors()
putBufferedWriter(bw) putBufferedWriter(bw)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", qStr, err) httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
} }
} }
var blockResultPool bytesutil.ByteBufferPool var blockResultPool bytesutil.ByteBufferPool
func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID, error) {
// Extract tenantID
tenantID, err := logstorage.GetTenantIDFromRequest(r)
if err != nil {
return nil, nil, fmt.Errorf("cannot obtain tenanID: %w", err)
}
tenantIDs := []logstorage.TenantID{tenantID}
// Parse query
qStr := r.FormValue("query")
q, err := logstorage.ParseQuery(qStr)
if err != nil {
return nil, nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err)
}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
return nil, nil, err
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
return nil, nil, err
}
if okStart || okEnd {
if !okStart {
start = math.MinInt64
}
if !okEnd {
end = math.MaxInt64
}
q.AddTimeFilter(start, end)
}
return q, tenantIDs, nil
}
func getTimeNsec(r *http.Request, argName string) (int64, bool, error) { func getTimeNsec(r *http.Request, argName string) (int64, bool, error) {
s := r.FormValue(argName) s := r.FormValue(argName)
if s == "" { if s == "" {

View file

@ -140,12 +140,27 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
} }
switch { switch path {
case path == "/logsql/query": case "/logsql/query":
logsqlQueryRequests.Inc() logsqlQueryRequests.Inc()
httpserver.EnableCORS(w, r) httpserver.EnableCORS(w, r)
logsql.ProcessQueryRequest(ctx, w, r) logsql.ProcessQueryRequest(ctx, w, r)
return true return true
case "/logsql/field_values":
logsqlFieldValuesRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessFieldValuesRequest(ctx, w, r)
return true
case "/logsql/field_names":
logsqlFieldNamesRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessFieldNamesRequest(ctx, w, r)
return true
case "/logsql/hits":
logsqlHitsRequests.Inc()
httpserver.EnableCORS(w, r)
logsql.ProcessHitsRequest(ctx, w, r)
return true
default: default:
return false return false
} }
@ -165,5 +180,8 @@ func getMaxQueryDuration(r *http.Request) time.Duration {
} }
var ( var (
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`) logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
logsqlFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_names"}`)
logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
) )

View file

@ -107,10 +107,22 @@ func MustAddRows(lr *logstorage.LogRows) {
} }
// RunQuery runs the given q and calls writeBlock for the returned data blocks // RunQuery runs the given q and calls writeBlock for the returned data blocks
func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock func(workerID uint, timestamps []int64, columns []logstorage.BlockColumn)) error { func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock logstorage.WriteBlockFunc) error {
return strg.RunQuery(ctx, tenantIDs, q, writeBlock) return strg.RunQuery(ctx, tenantIDs, q, writeBlock)
} }
// GetFieldNames executes q and returns field names seen in results.
func GetFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]string, error) {
return strg.GetFieldNames(ctx, tenantIDs, q)
}
// GetFieldValues executes q and returns unique values for the fieldName seen in results.
//
// If limit > 0, then up to limit unique values are returned.
func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]string, error) {
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
}
func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) { func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) {
var ss logstorage.StorageStats var ss logstorage.StorageStats
strg.UpdateStats(&ss) strg.UpdateStats(&ss)

View file

@ -19,6 +19,25 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip ## tip
## [v0.8.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.8.0-victorialogs)
Released at 2024-05-20
* FEATURE: add ability to extract JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe).
* FEATURE: add ability to extract [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe).
* FEATURE: add ability to extract arbitrary text from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into the output fields. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#extact-pipe).
* FEATURE: add ability to put arbitrary [queries](https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax) inside [`in()` filter](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter).
* FEATURE: add support for post-filtering of query results with [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe).
* FEATURE: allow applying individual [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) per each [stats function](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats-with-additional-filters).
* FEATURE: allow passing string values to [`min`](https://docs.victoriametrics.com/victorialogs/logsql/#min-stats) and [`max`](https://docs.victoriametrics.com/victorialogs/logsql/#max-stats) functions. Previously only numeric values could be passed to them.
* FEATURE: speed up [`sort ... limit N` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) for typical cases.
* FEATURE: allow using more convenient syntax for [`range` filters](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter) if upper or lower bound isn't needed. For example, it is possible to write `response_size:>=10KiB` instead of `response_size:range[10KiB, inf)`, or `temperature:<42` instead of `temperature:range(-inf, 42)`.
* FEATURE: add `/select/logsql/hits` HTTP endpoint for returning the number of matching logs per the given time bucket over the selected time range. See [tese docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats) for details.
* FEATURE: add `/select/logsql/field_names` HTTP endpoint for returning [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names) for details.
* FEATURE: add `/select/logsql/field_values` HTTP endpoint for returning unique values for the given [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) obtained from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values) for details.
* BUGFIX: properly take into account `offset` [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) when it already has `limit`. For example, `_time:5m | sort by (foo) offset 20 limit 10`.
## [v0.7.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.7.0-victorialogs) ## [v0.7.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.7.0-victorialogs)
Released at 2024-05-15 Released at 2024-05-15

View file

@ -184,7 +184,7 @@ For example, the following query selects all the logs for the last 5 minutes by
_time:5m _time:5m
``` ```
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as `pipes`. Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes).
For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` [word](#word) for the last 5 minutes: with the `error` [word](#word) for the last 5 minutes:
@ -213,7 +213,6 @@ single quotes `'` and backticks:
If doubt, it is recommended quoting field names and filter args. If doubt, it is recommended quoting field names and filter args.
The list of LogsQL filters: The list of LogsQL filters:
- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) in the given time range - [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) in the given time range
@ -653,16 +652,16 @@ log.level:in("error", "fatal")
It works very fast for long lists passed to `in()`. It works very fast for long lists passed to `in()`.
The future VictoriaLogs versions will allow passing arbitrary [queries](#query-syntax) into `in()` filter. It is possible to pass arbitrary [query](#query-syntax) inside `in(...)` filter in order to match against the results of this query.
For example, the following query selects all the logs for the last hour for users, who visited pages with `admin` [word](#word) in the `path` The query inside `in(...)` must end with [`fields`](#fields-pipe) pipe containing a single field name, so VictoriaLogs could
fetch results from this field. For example, the following query selects all the logs for the last 5 minutes for users,
who visited pages with `admin` [word](#word) in the `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
during the last day: during the last day:
```logsql ```logsql
_time:1h AND user_id:in(_time:1d AND path:admin | fields user_id) _time:5m AND user_id:in(_time:1d AND path:admin | fields user_id)
``` ```
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
See also: See also:
- [Exact filter](#exact-filter) - [Exact filter](#exact-filter)
@ -825,6 +824,18 @@ for searching for log entries with request durations exceeding 4.2 seconds:
request.duration:range(4.2, Inf) request.duration:range(4.2, Inf)
``` ```
This query can be shortened to:
```logsql
request.duration:>4.2
```
The following query returns logs with request durations smaller or equal to 1.5 seconds:
```logsql
request.duration:<=1.5
```
The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding
parentheses with square brackets. For example: parentheses with square brackets. For example:
@ -832,17 +843,13 @@ parentheses with square brackets. For example:
- `range(1, 10]` includes `10` in the matching range - `range(1, 10]` includes `10` in the matching range
- `range[1, 10]` includes `1` and `10` in the matching range - `range[1, 10]` includes `1` and `10` in the matching range
The range boundaries can be expressed in the following forms: The range boundaries can contain any [supported numeric values](#numeric-values).
- Hexadecimal form. For example, `range(0xff, 0xABCD)`.
- Binary form. Form example, `range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`.
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds` with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text. [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
Extract the numeric value from the message with `parse(_msg, "the request took <request_duration> seconds")` [transformation](#transformations) Extract the numeric value from the message with `parse(_msg, "the request took <request_duration> seconds")` [transformation](#transformations)
and then apply the `range()` [post-filter](#post-filters) to the extracted `request_duration` field. and then apply the `range()` [filter pipe](#filter-pipe) to the extracted `request_duration` field.
Performance tips: Performance tips:
@ -884,7 +891,7 @@ user.ip:ipv4_range("1.2.3.4")
Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")` Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")`
doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from <ip>: done")` [transformation](#transformations) since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from <ip>: done")` [transformation](#transformations)
and then apply the `ipv4_range()` [post-filter](#post-filters) to the extracted `ip` field. and then apply the `ipv4_range()` [filter pipe](#filter-pipe) to the extracted `ip` field.
Hints: Hints:
@ -1045,13 +1052,18 @@ LogsQL supports the following pipes:
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`filter`](#filter-pipe) applies additional [filters](#filters) to results.
- [`limit`](#limit-pipe) limits the number selected logs. - [`limit`](#limit-pipe) limits the number selected logs.
- [`offset`](#offset-pipe) skips the given number of selected logs. - [`offset`](#offset-pipe) skips the given number of selected logs.
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`stats`](#stats-pipe) calculates various stats over the selected logs. - [`stats`](#stats-pipe) calculates various stats over the selected logs.
- [`uniq`](#uniq-pipe) returns unique log entires. - [`uniq`](#uniq-pipe) returns unique log entires.
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
### copy pipe ### copy pipe
@ -1096,6 +1108,97 @@ See also:
- [`rename` pipe](#rename-pipe) - [`rename` pipe](#rename-pipe)
- [`fields` pipe](#fields-pipe) - [`fields` pipe](#fields-pipe)
### extract pipe
`| extract from field_name "pattern"` [pipe](#pipes) allows extracting additional fields specified in the `pattern` from the given
`field_name` [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). Existing log fields remain unchanged
after the `| extract ...` pipe.
`| extract ...` pipe can be useful for extracting additional fields needed for further data processing with other pipes such as [`stats` pipe](#stats-pipe) or [`sort` pipe](#sort-pipe).
For example, the following query selects logs with the `error` [word](#word) for the last day,
extracts ip address from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) into `ip` field and then calculates top 10 ip addresses
with the biggest number of logs:
```logsql
_time:1d error | extract from _msg "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
```
It is expected that `_msg` field contains `ip=...` substring, which ends with space. For example, `error from ip=1.2.3.4, user_id=42`.
If the `| extract ...` pipe is applied to [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), then the `from _msg` part can be omitted.
For example, the following query is equivalent to the previous one:
```logsql
_time:1d error | extract "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
```
See also:
- [format for extract pipe pattern](#format-for-extract-pipe-pattern)
- [`unpack_json` pipe](#unpack_json-pipe)
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
#### Format for extract pipe pattern
The `pattern` part from [`| extract from src_field "pattern"` pipe](#extract-pipes) may contain arbitrary text, which matches as is to the `src_field` value.
Additionally to arbitrary text, the `pattern` may contain placeholders in the form `<...>`, which match any strings, including empty strings.
Placeholders may be named, such as `<ip>`, or anonymous, such as `<_>`. Named placeholders extract the matching text into
the corresponding [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
Anonymous placeholders are useful for skipping arbitrary text during pattern matching.
For example, if [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) contains the following text:
```
1.2.3.4 GET /foo/bar?baz 404 "Mozilla foo bar baz" some tail here
```
Then the following `| extract ...` [pipe](#pipes) can be used for extracting `ip`, `path` and `user_agent` fields from it:
```
| extract '<ip> <_> <path> <_> "<user_agent>"'
```
Note that the user-agent part of the log message is in double quotes. This means that it may contain special chars, including escaped double quote, e.g. `\"`.
This may break proper matching of the string in double quotes.
VictoriaLogs automatically detects the whole string in quotes and automatically decodes it if the first char in the placeholder is double quote or backtick.
So it is better to use the following `pattern` for proper matching of quoted strings:
```
| extract "<ip> <_> <path> <_> <user_agent>"
```
Note that the `user_agent` now matches double quotes, but VictoriaLogs automatically unquotes the matching string before storing it in the `user_agent` field.
This propery is useful for extracting JSON strings. For example, the following `pattern` properly extracts the `message` JSON string into `msg` field:
```
| extract '"message":<msg>'
```
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
For example, the following `pattern` properly matches `a < 123.456` text:
```
| extract "<left> &lt; <right>"
```
### field_names pipe
Sometimes it may be needed to get all the field names for the selected results. This may be done with `| field_names ...` [pipe](#pipes).
For example, the following query returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
from the logs over the last 5 minutes:
```logsql
_time:5m | field_names as names
```
Field names are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
See also:
- [`uniq` pipe](#uniq-pipe)
### fields pipe ### fields pipe
By default all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) are returned in the response. By default all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) are returned in the response.
@ -1112,6 +1215,23 @@ See also:
- [`rename` pipe](#rename-pipe) - [`rename` pipe](#rename-pipe)
- [`delete` pipe](#delete-pipe) - [`delete` pipe](#delete-pipe)
### filter pipe
Sometimes it is needed to apply additional filters on the calculated results. This can be done with `| filter ...` [pipe](#pipes).
The `filter` pipe can contain arbitrary [filters](#filters).
For example, the following query returns `host` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) values
if the number of log messages with the `error` [word](#word) for them over the last hour exceeds `1_000`:
```logsql
_time:1h error | stats by (host) count() logs_count | filter logs_count:> 1_000
```
See also:
- [`stats` pipe](#stats-pipe)
- [`sort` pipe](#sort-pipe)
### limit pipe ### limit pipe
If only a subset of selected logs must be processed, then `| limit N` [pipe](#pipes) can be used, where `N` can contain any [supported integer numeric value](#numeric-values). If only a subset of selected logs must be processed, then `| limit N` [pipe](#pipes) can be used, where `N` can contain any [supported integer numeric value](#numeric-values).
@ -1200,6 +1320,12 @@ The reverse order can be applied globally via `desc` keyword after `by(...)` cla
_time:5m | sort by (foo, bar) desc _time:5m | sort by (foo, bar) desc
``` ```
The `by` keyword can be skipped in `sort ...` pipe. For example, the following query is equivalent to the previous one:
```logsql
_time:5m | sort (foo, bar) desc
```
Sorting of big number of logs can consume a lot of CPU time and memory. Sometimes it is enough to return the first `N` entries with the biggest Sorting of big number of logs can consume a lot of CPU time and memory. Sometimes it is enough to return the first `N` entries with the biggest
or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe. or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe.
Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries. Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries.
@ -1232,35 +1358,6 @@ See also:
- [`limit` pipe](#limit-pipe) - [`limit` pipe](#limit-pipe)
- [`offset` pipe](#offset-pipe) - [`offset` pipe](#offset-pipe)
### uniq pipe
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
```logsql
_time:5m | uniq by (ip)
```
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path)
```
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path) limit 100
```
See also:
- [`uniq_values` stats function](#uniq_values-stats)
### stats pipe ### stats pipe
`| stats ...` pipe allows calculating various stats over the selected logs. For example, the following LogsQL query `| stats ...` pipe allows calculating various stats over the selected logs. For example, the following LogsQL query
@ -1293,6 +1390,13 @@ _time:5m | stats count() logs_total, count_uniq(_stream) streams_total
See also: See also:
- [stats by fields](#stats-by-fields)
- [stats by time buckets](#stats-by-time-buckets)
- [stats by time buckets with timezone offset](#stats-by-time-buckets-with-timezone-offset)
- [stats by field buckets](#stats-by-field-buckets)
- [stats by IPv4 buckets](#stats-by-ipv4-buckets)
- [stats with additional filters](#stats-with-additional-filters)
- [stats pipe functions](#stats-pipe-functions)
- [`sort` pipe](#sort-pipe) - [`sort` pipe](#sort-pipe)
@ -1316,6 +1420,12 @@ grouped by `(host, path)` fields:
_time:5m | stats by (host, path) count() logs_total, count_uniq(ip) ips_total _time:5m | stats by (host, path) count() logs_total, count_uniq(ip) ips_total
``` ```
The `by` keyword can be skipped in `stats ...` pipe. For example, the following query is equvalent to the previous one:
```logsql
_time:5m | stats (host, path) count() logs_total, count_uniq(ip) ips_total
```
#### Stats by time buckets #### Stats by time buckets
The following syntax can be used for calculating stats grouped by time buckets: The following syntax can be used for calculating stats grouped by time buckets:
@ -1384,6 +1494,139 @@ extracted from the `ip` [log field](https://docs.victoriametrics.com/victorialog
_time:5m | stats by (ip:/24) count() requests_per_subnet _time:5m | stats by (ip:/24) count() requests_per_subnet
``` ```
#### Stats with additional filters
Sometimes it is needed to calculate stats on different subsets of matching logs. This can be done by inserting `if (<any_filters>)` condition
between [stats function](#stats-pipe-functions) and `result_name`, where `any_filter` can contain arbitrary [filters](#filters).
For example, the following query calculates individually the number of [logs messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with `GET`, `POST` and `PUT` [words](#word), additionally to the total number of logs over the last 5 minutes:
```logsql
_time:5m | stats
count() if (GET) gets,
count() if (POST) posts,
count() if (PUT) puts,
count() total
```
### uniq pipe
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
```logsql
_time:5m | uniq by (ip)
```
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path)
```
The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
```logsql
_time:5m | uniq by (host, path) limit 100
```
The `by` keyword can be skipped in `uniq ...` pipe. For example, the following query is equivalent to the previous one:
```logsql
_time:5m | uniq (host, path) limit 100
```
See also:
- [`uniq_values` stats function](#uniq_values-stats)
### unpack_json pipe
`| unpack_json from field_name` pipe unpacks `{"k1":"v1", ..., "kN":"vN"}` JSON from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
into `k1`, ... `kN` field names with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
Nested JSON is unpacked according to the rules defined [here](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
For example, the following query unpacks JSON fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) across logs for the last 5 minutes:
```logsql
_time:5m | unpack_json from _msg
```
The `from _json` part can be omitted when JSON fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | unpack_json
```
If you want to make sure that the unpacked JSON fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
by adding `result_prefix "prefix_name"` to `unpack_json`. For example, the following query adds `foo_` prefix for all the unpacked fields
form [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```logsql
_time:5m | unpack_json result_prefix "foo_"
```
Performance tip: if you need extracting a single field from long JSON, it is faster to use [`extract` pipe](#extract-pipe). For example, the following query extracts `"ip"` field from JSON
stored in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```
_time:5m | extract '"ip":<field_value>'
```
See also:
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
- [`extract` pipe](#extract-pipe)
### unpack_logfmt pipe
`| unpack_logfmt from field_name` pipe unpacks `k1=v1 ... kN=vN` [logfmt](https://brandur.org/logfmt) fields
from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into `k1`, ... `kN` field names
with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
For example, the following query unpacks [logfmt](https://brandur.org/logfmt) fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
across logs for the last 5 minutes:
```logsql
_time:5m | unpack_logfmt from _msg
```
The `from _json` part can be omitted when [logfmt](https://brandur.org/logfmt) fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | unpack_logfmt
```
If you want to make sure that the unpacked [logfmt](https://brandur.org/logfmt) fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
by adding `result_prefix "prefix_name"` to `unpack_logfmt`. For example, the following query adds `foo_` prefix for all the unpacked fields
from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```logsql
_time:5m | unpack_logfmt result_prefix "foo_"
```
Performance tip: if you need extracting a single field from long [logfmt](https://brandur.org/logfmt) line, it is faster to use [`extract` pipe](#extract-pipe).
For example, the following query extracts `"ip"` field from [logfmt](https://brandur.org/logfmt) line stored
in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```
_time:5m | extract ' ip=<field_value>'
```
See also:
- [`unpack_json` pipe](#unpack_json-pipe)
- [`extract` pipe](#extract-pipe)
## stats pipe functions ## stats pipe functions
LogsQL supports the following functions for [`stats` pipe](#stats-pipe): LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
@ -1505,9 +1748,8 @@ See also:
### max stats ### max stats
`max(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the maximum value across `max(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the maximum value across
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
Non-numeric values are ignored.
For example, the following query returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) For example, the following query returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes: over logs for the last 5 minutes:
@ -1543,9 +1785,8 @@ See also:
### min stats ### min stats
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across `min(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the minimum value across
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
Non-numeric values are ignored.
For example, the following query returns the minimum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) For example, the following query returns the minimum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes: over logs for the last 5 minutes:
@ -1678,48 +1919,37 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
## Transformations ## Transformations
It is possible to perform various transformations on the [selected log entries](#filters) at client side LogsQL supports the following transformations on the log entries selected with [filters](#filters):
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
LogsQL will support the following transformations for the [selected](#filters) log entries: - Extracting arbitrary text from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
See [these docs](#extract-pipe) for details.
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_json-pipe).
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_logfmt-pipe).
LogsQL will support the following transformations in the future:
- Extracting the specified fields from text [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
- Extracting the specified fields from JSON strings stored inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Extracting the specified fields from [logfmt](https://brandur.org/logfmt) strings stored
inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) - Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
according to the provided format. according to the provided format.
- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). - Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Parsing duration strings into floating-point seconds for further [stats calculations](#stats-pipe). - Parsing duration strings into floating-point seconds for further [stats calculations](#stats-pipe).
- Creating a boolean field with the result of arbitrary [post-filters](#post-filters) applied to the current fields.
- Creating an integer field with the length of the given field value. This can be useful for [stats calculations](#stats-pipe).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
It is also possible to perform various transformations on the [selected log entries](#filters) at client side
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
## Post-filters ## Post-filters
It is possible to perform post-filtering on the [selected log entries](#filters) at client side with `grep` or similar Unix commands Post-filtering of query results can be performed at any step by using [`filter` pipe](#filter-pipe).
It is also possible to perform post-filtering of the [selected log entries](#filters) at client side with `grep` and similar Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line). according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
LogsQL will support post-filtering on the original [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and fields created by various [transformations](#transformations). The following post-filters will be supported:
- Full-text [filtering](#filters).
- [Logical filtering](#logical-filter).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
## Stats ## Stats
Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe). Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) It is also possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
and fields created by [transformations](#transformations):
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line). according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
## Sorting ## Sorting

View file

@ -35,15 +35,13 @@ The following functionality is planned in the future versions of VictoriaLogs:
- Syslog - Syslog
- Journald (systemd) - Journald (systemd)
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html): - Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
- [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations). - [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
- [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters). - [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
- The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function.
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`. - Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
- Web UI with the following abilities: - Web UI with the following abilities:
- Explore the ingested logs ([partially done](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui)). - Explore the ingested logs ([partially done](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui)).
- Build graphs over time for the ingested logs. - Build graphs over time for the ingested logs via [hits HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats).
- Integration with Grafana. - Integration with Grafana ([partially done](https://github.com/VictoriaMetrics/victorialogs-datasource)).
- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots). - Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).
- Cluster version of VictoriaLogs. - Cluster version of VictoriaLogs.
- Ability to store data to object storage (such as S3, GCS, Minio). - Ability to store data to object storage (such as S3, GCS, Minio).

View file

@ -88,6 +88,188 @@ curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID:
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/VictoriaLogs/#monitoring) The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/VictoriaLogs/#monitoring)
with `vl_http_requests_total{path="/select/logsql/query"}` metric. with `vl_http_requests_total{path="/select/logsql/query"}` metric.
### Querying hits stats
VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
time range grouped by `<step>` buckets. The returned results are sorted by time.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
The `<step>` arg can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#stats-by-time-buckets).
If `<step>` is missing, then it equals to `1d` (one day).
For example, the following command returns per-hour number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) over logs for the 3 hours:
```sh
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=3h' -d 'step=1h'
```
Below is an example JSON output returned from this endpoint:
```json
{
"hits": [
{
"fields": {},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
410339,
450311,
899506
]
}
]
}
```
Additionally, the `offset=<offset>` arg can be passed to `/select/logsql/hits` in order to group buckets according to the given timezone offset.
The `<offset>` can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#duration-values).
For example, the following command returns per-day number of logs with `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
over the last week in New York time zone (`-4h`):
```logsql
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=1w' -d 'step=1d' -d 'offset=-4h'
```
Additionally, any number of `field=<field_name>` args can be passed to `/select/logsql/hits` for grouping hits buckets by the mentioned `<field_name>` fields.
For example, the following query groups hits by `level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) additionally to the provided `step`:
```logsql
curl http://localhost:9428/select/logsql/hits -d 'query=*' -d 'start=3h' -d 'step=1h' -d 'field=level'
```
The grouped fields are put inside `"fields"` object:
```json
{
"hits": [
{
"fields": {
"level": "error"
},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
25,
20,
15
]
},
{
"fields": {
"level": "info"
},
"timestamps": [
"2024-01-01T00:00:00Z",
"2024-01-01T01:00:00Z",
"2024-01-01T02:00:00Z"
],
"values": [
25625,
35043,
25230
]
}
]
}
```
See also:
- [Querying field names](#querying-field-names)
- [Querying field values](#querying-field-values)
- [HTTP API](#http-api)
### Querying field names
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
from result of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes:
```sh
curl http://localhost:9428/select/logsql/field_names -d 'query=error' -d 'start=5m'
```
Below is an example JSON output returned from this endpoint:
```json
{
"names": [
"_msg",
"_stream",
"_time",
"host",
"level",
"location"
]
}
```
See also:
- [Querying field values](#querying-field-values)
- [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api)
### Querying field values
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field_name=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns unique the values for `host` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) for the last 5 minutes:
```sh
curl http://localhost:9428/select/logsql/field_values -d 'query=error' -d 'field_name=host' -d 'start=5m'
```
Below is an example JSON output returned from this endpoint:
```json
{
"values": [
"host_0",
"host_1",
"host_10",
"host_100",
"host_1000"
]
}
```
The `/select/logsql/field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
See also:
- [Querying field names](#querying-field-names)
- [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api)
## Web UI ## Web UI
VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) and exploration VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) and exploration

View file

@ -1,71 +0,0 @@
package logjson
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestParserSuccess(t *testing.T) {
f := func(data string, fieldsExpected []logstorage.Field) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutParser(p)
}
f("{}", nil)
f(`{"foo":"bar"}`, []logstorage.Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, []logstorage.Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
}

View file

@ -4,6 +4,7 @@ import (
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
) )
func getArena() *arena { func getArena() *arena {
@ -29,8 +30,12 @@ func (a *arena) reset() {
a.b = a.b[:0] a.b = a.b[:0]
} }
func (a *arena) preallocate(n int) {
a.b = slicesutil.ExtendCapacity(a.b, n)
}
func (a *arena) sizeBytes() int { func (a *arena) sizeBytes() int {
return len(a.b) return cap(a.b)
} }
func (a *arena) copyBytes(b []byte) []byte { func (a *arena) copyBytes(b []byte) []byte {
@ -41,9 +46,8 @@ func (a *arena) copyBytes(b []byte) []byte {
ab := a.b ab := a.b
abLen := len(ab) abLen := len(ab)
ab = append(ab, b...) ab = append(ab, b...)
result := ab[abLen:]
a.b = ab a.b = ab
return result return ab[abLen:]
} }
func (a *arena) copyBytesToString(b []byte) string { func (a *arena) copyBytesToString(b []byte) string {

View file

@ -11,8 +11,8 @@ func TestArena(t *testing.T) {
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
a := getArena() a := getArena()
if n := a.sizeBytes(); n != 0 { if n := len(a.b); n != 0 {
t.Fatalf("unexpected non-zero size of empty arena: %d", n) t.Fatalf("unexpected non-zero length of empty arena: %d", n)
} }
// add values to arena // add values to arena
@ -35,9 +35,12 @@ func TestArena(t *testing.T) {
} }
} }
if n := a.sizeBytes(); n != valuesLen { if n := len(a.b); n != valuesLen {
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen) t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
} }
if n := a.sizeBytes(); n < valuesLen {
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
}
// Try allocating slices with different lengths // Try allocating slices with different lengths
bs := make([]string, 100) bs := make([]string, 100)
@ -47,9 +50,12 @@ func TestArena(t *testing.T) {
t.Fatalf("unexpected len(b); got %d; want %d", len(b), j) t.Fatalf("unexpected len(b); got %d; want %d", len(b), j)
} }
valuesLen += j valuesLen += j
if n := a.sizeBytes(); n != valuesLen { if n := len(a.b); n != valuesLen {
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen) t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
} }
if n := a.sizeBytes(); n < valuesLen {
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
}
for k := range b { for k := range b {
b[k] = byte(k) b[k] = byte(k)
} }

View file

@ -45,6 +45,8 @@ func (bm *bitmap) copyFrom(src *bitmap) {
} }
func (bm *bitmap) init(bitsLen int) { func (bm *bitmap) init(bitsLen int) {
bm.reset()
a := bm.a a := bm.a
wordsLen := (bitsLen + 63) / 64 wordsLen := (bitsLen + 63) / 64
a = slicesutil.SetLength(a, wordsLen) a = slicesutil.SetLength(a, wordsLen)
@ -126,6 +128,7 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
if word == 0 { if word == 0 {
continue continue
} }
wordNew := word
for j := 0; j < 64; j++ { for j := 0; j < 64; j++ {
mask := uint64(1) << j mask := uint64(1) << j
if (word & mask) == 0 { if (word & mask) == 0 {
@ -136,9 +139,42 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
break break
} }
if !f(idx) { if !f(idx) {
a[i] &= ^mask wordNew &= ^mask
} }
} }
if word != wordNew {
a[i] = wordNew
}
}
}
// forEachSetBitReadonly calls f for each set bit
func (bm *bitmap) forEachSetBitReadonly(f func(idx int)) {
if bm.areAllBitsSet() {
n := bm.bitsLen
for i := 0; i < n; i++ {
f(i)
}
return
}
a := bm.a
bitsLen := bm.bitsLen
for i, word := range a {
if word == 0 {
continue
}
for j := 0; j < 64; j++ {
mask := uint64(1) << j
if (word & mask) == 0 {
continue
}
idx := i*64 + j
if idx >= bitsLen {
break
}
f(idx)
}
} }
} }

View file

@ -32,7 +32,7 @@ func TestBitmap(t *testing.T) {
// Make sure that all the bits are set. // Make sure that all the bits are set.
nextIdx := 0 nextIdx := 0
bm.forEachSetBit(func(idx int) bool { bm.forEachSetBitReadonly(func(idx int) {
if idx >= i { if idx >= i {
t.Fatalf("index must be smaller than %d", i) t.Fatalf("index must be smaller than %d", i)
} }
@ -40,7 +40,6 @@ func TestBitmap(t *testing.T) {
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx) t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
} }
nextIdx++ nextIdx++
return true
}) })
if !bm.areAllBitsSet() { if !bm.areAllBitsSet() {
@ -66,12 +65,11 @@ func TestBitmap(t *testing.T) {
} }
nextIdx = 1 nextIdx = 1
bm.forEachSetBit(func(idx int) bool { bm.forEachSetBitReadonly(func(idx int) {
if idx != nextIdx { if idx != nextIdx {
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx) t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
} }
nextIdx += 2 nextIdx += 2
return true
}) })
// Clear all the bits // Clear all the bits
@ -93,9 +91,8 @@ func TestBitmap(t *testing.T) {
} }
bitsCount := 0 bitsCount := 0
bm.forEachSetBit(func(_ int) bool { bm.forEachSetBitReadonly(func(_ int) {
bitsCount++ bitsCount++
return true
}) })
if bitsCount != 0 { if bitsCount != 0 {
t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount) t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount)

View file

@ -0,0 +1,131 @@
package logstorage
import (
"testing"
)
func BenchmarkBitmapForEachSetBitReadonly(b *testing.B) {
const bitsLen = 64 * 1024
b.Run("no-zero-bits", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
b.Run("half-zero-bits", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
b.Run("one-set-bit", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBitReadonly(b, bm)
putBitmap(bm)
})
}
func BenchmarkBitmapForEachSetBit(b *testing.B) {
const bitsLen = 64 * 1024
b.Run("no-zero-bits-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("no-zero-bits-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
b.Run("half-zero-bits-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("half-zero-bits-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx%2 == 0
})
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
b.Run("one-set-bit-noclear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBit(b, bm, false)
putBitmap(bm)
})
b.Run("one-set-bit-clear", func(b *testing.B) {
bm := getBitmap(bitsLen)
bm.setBits()
bm.forEachSetBit(func(idx int) bool {
return idx == bitsLen/2
})
benchmarkBitmapForEachSetBit(b, bm, true)
putBitmap(bm)
})
}
func benchmarkBitmapForEachSetBitReadonly(b *testing.B, bm *bitmap) {
b.SetBytes(int64(bm.bitsLen))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
bmLocal := getBitmap(bm.bitsLen)
n := 0
for pb.Next() {
bmLocal.copyFrom(bm)
bmLocal.forEachSetBitReadonly(func(idx int) {
n++
})
}
putBitmap(bmLocal)
GlobalSink.Add(uint64(n))
})
}
func benchmarkBitmapForEachSetBit(b *testing.B, bm *bitmap, isClearBits bool) {
b.SetBytes(int64(bm.bitsLen))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
bmLocal := getBitmap(bm.bitsLen)
n := 0
for pb.Next() {
bmLocal.copyFrom(bm)
bmLocal.forEachSetBit(func(idx int) bool {
n++
return !isClearBits
})
if isClearBits {
if !bmLocal.isZero() {
panic("BUG: bitmap must have no set bits")
}
} else {
if bmLocal.isZero() {
panic("BUG: bitmap must have some set bits")
}
}
}
putBitmap(bmLocal)
GlobalSink.Add(uint64(n))
})
}

File diff suppressed because it is too large Load diff

View file

@ -146,7 +146,7 @@ func (bs *blockSearch) partPath() string {
return bs.bsw.p.path return bs.bsw.p.path
} }
func (bs *blockSearch) search(bsw *blockSearchWork) { func (bs *blockSearch) search(bsw *blockSearchWork, bm *bitmap) {
bs.reset() bs.reset()
bs.bsw = bsw bs.bsw = bsw
@ -154,23 +154,22 @@ func (bs *blockSearch) search(bsw *blockSearchWork) {
bs.csh.initFromBlockHeader(&bs.a, bsw.p, &bsw.bh) bs.csh.initFromBlockHeader(&bs.a, bsw.p, &bsw.bh)
// search rows matching the given filter // search rows matching the given filter
bm := getBitmap(int(bsw.bh.rowsCount)) bm.init(int(bsw.bh.rowsCount))
defer putBitmap(bm)
bm.setBits() bm.setBits()
bs.bsw.so.filter.apply(bs, bm) bs.bsw.so.filter.applyToBlockSearch(bs, bm)
bs.br.mustInit(bs, bm)
if bm.isZero() { if bm.isZero() {
// The filter doesn't match any logs in the current block. // The filter doesn't match any logs in the current block.
return return
} }
bs.br.mustInit(bs, bm)
// fetch the requested columns to bs.br. // fetch the requested columns to bs.br.
if bs.bsw.so.needAllColumns { if bs.bsw.so.needAllColumns {
bs.br.fetchAllColumns(bs, bm) bs.br.initAllColumns(bs, bm)
} else { } else {
bs.br.fetchRequestedColumns(bs, bm) bs.br.initRequestedColumns(bs, bm)
} }
} }

View file

@ -37,7 +37,22 @@ func (fs fieldsSet) getAll() []string {
return a return a
} }
func (fs fieldsSet) addFields(fields []string) {
for _, f := range fields {
fs.add(f)
}
}
func (fs fieldsSet) removeFields(fields []string) {
for _, f := range fields {
fs.remove(f)
}
}
func (fs fieldsSet) contains(field string) bool { func (fs fieldsSet) contains(field string) bool {
if field == "" {
field = "_msg"
}
_, ok := fs[field] _, ok := fs[field]
if !ok { if !ok {
_, ok = fs["*"] _, ok = fs["*"]
@ -45,28 +60,19 @@ func (fs fieldsSet) contains(field string) bool {
return ok return ok
} }
func (fs fieldsSet) removeAll(fields []string) {
for _, f := range fields {
fs.remove(f)
}
}
func (fs fieldsSet) remove(field string) { func (fs fieldsSet) remove(field string) {
if field == "*" { if field == "*" {
fs.reset() fs.reset()
return return
} }
if !fs.contains("*") { if !fs.contains("*") {
if field == "" {
field = "_msg"
}
delete(fs, field) delete(fs, field)
} }
} }
func (fs fieldsSet) addAll(fields []string) {
for _, f := range fields {
fs.add(f)
}
}
func (fs fieldsSet) add(field string) { func (fs fieldsSet) add(field string) {
if fs.contains("*") { if fs.contains("*") {
return return
@ -76,5 +82,8 @@ func (fs fieldsSet) add(field string) {
fs["*"] = struct{}{} fs["*"] = struct{}{}
return return
} }
if field == "" {
field = "_msg"
}
fs[field] = struct{}{} fs[field] = struct{}{}
} }

View file

@ -17,9 +17,10 @@ func TestFieldsSet(t *testing.T) {
} }
fs.add("foo") fs.add("foo")
fs.add("bar") fs.add("bar")
fs.add("")
s := fs.String() s := fs.String()
if s != "[bar,foo]" { if s != "[_msg,bar,foo]" {
t.Fatalf("unexpected String() result; got %s; want %s", s, "[bar,foo]") t.Fatalf("unexpected String() result; got %s; want %s", s, "[_msg,bar,foo]")
} }
if !fs.contains("foo") { if !fs.contains("foo") {
t.Fatalf("fs must contain foo") t.Fatalf("fs must contain foo")
@ -27,6 +28,12 @@ func TestFieldsSet(t *testing.T) {
if !fs.contains("bar") { if !fs.contains("bar") {
t.Fatalf("fs must contain bar") t.Fatalf("fs must contain bar")
} }
if !fs.contains("") {
t.Fatalf("fs must contain _msg")
}
if !fs.contains("_msg") {
t.Fatalf("fs must contain _msg")
}
if fs.contains("baz") { if fs.contains("baz") {
t.Fatalf("fs musn't contain baz") t.Fatalf("fs musn't contain baz")
} }
@ -41,6 +48,13 @@ func TestFieldsSet(t *testing.T) {
if fs.contains("bar") { if fs.contains("bar") {
t.Fatalf("fs mustn't contain bar") t.Fatalf("fs mustn't contain bar")
} }
fs.remove("")
if fs.contains("") {
t.Fatalf("fs mustn't contain _msg")
}
if fs.contains("_msg") {
t.Fatalf("fs mustn't contain _msg")
}
// verify * // verify *
fs.add("*") fs.add("*")
@ -59,25 +73,25 @@ func TestFieldsSet(t *testing.T) {
t.Fatalf("fs must be empty") t.Fatalf("fs must be empty")
} }
// verify addAll, getAll, removeAll // verify addFields, removeFields, getAll
fs.addAll([]string{"foo", "bar"}) fs.addFields([]string{"foo", "bar", "_msg"})
if !fs.contains("foo") || !fs.contains("bar") { if !fs.contains("foo") || !fs.contains("bar") || !fs.contains("_msg") {
t.Fatalf("fs must contain foo and bar") t.Fatalf("fs must contain foo, bar and _msg")
} }
a := fs.getAll() a := fs.getAll()
if !reflect.DeepEqual(a, []string{"bar", "foo"}) { if !reflect.DeepEqual(a, []string{"_msg", "bar", "foo"}) {
t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"bar", "foo"}) t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"_msg", "bar", "foo"})
} }
fs.removeAll([]string{"bar", "baz"}) fs.removeFields([]string{"bar", "baz", "_msg"})
if fs.contains("bar") || fs.contains("baz") { if fs.contains("bar") || fs.contains("baz") || fs.contains("_msg") {
t.Fatalf("fs mustn't contain bar and baz") t.Fatalf("fs mustn't contain bar, baz and _msg")
} }
if !fs.contains("foo") { if !fs.contains("foo") {
t.Fatalf("fs must contain foo") t.Fatalf("fs must contain foo")
} }
// verify clone // verify clone
fs.addAll([]string{"foo", "bar", "baz"}) fs.addFields([]string{"foo", "bar", "baz"})
fsStr := fs.String() fsStr := fs.String()
fsCopy := fs.clone() fsCopy := fs.clone()
fsCopyStr := fsCopy.String() fsCopyStr := fsCopy.String()

View file

@ -5,6 +5,104 @@ type filter interface {
// String returns string representation of the filter // String returns string representation of the filter
String() string String() string
// apply must update bm according to the filter applied to the given bs block // udpdateNeededFields must update neededFields with fields needed for the filter
apply(bs *blockSearch, bm *bitmap) updateNeededFields(neededFields fieldsSet)
// applyToBlockSearch must update bm according to the filter applied to the given bs block
applyToBlockSearch(bs *blockSearch, bm *bitmap)
// applyToBlockResult must update bm according to the filter applied to the given br block
applyToBlockResult(br *blockResult, bm *bitmap)
}
// visitFilter sequentially calls visitFunc for filters inside f.
//
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
// It returns the result of the last visitFunc call.
func visitFilter(f filter, visitFunc func(f filter) bool) bool {
switch t := f.(type) {
case *filterAnd:
return visitFilters(t.filters, visitFunc)
case *filterOr:
return visitFilters(t.filters, visitFunc)
case *filterNot:
return visitFilter(t.f, visitFunc)
default:
return visitFunc(f)
}
}
// visitFilters calls visitFunc per each filter in filters.
//
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
// It returns the result of the last visitFunc call.
func visitFilters(filters []filter, visitFunc func(f filter) bool) bool {
for _, f := range filters {
if visitFilter(f, visitFunc) {
return true
}
}
return false
}
// copyFilter recursively copies f filters with the help of copyFunc if visitFunc returns true for them.
//
// It doesn't copy other filters by returning them as is.
func copyFilter(f filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) (filter, error) {
switch t := f.(type) {
case *filterAnd:
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fa := &filterAnd{
filters: filters,
}
return fa, nil
case *filterOr:
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fo := &filterOr{
filters: filters,
}
return fo, nil
case *filterNot:
f, err := copyFilter(t.f, visitFunc, copyFunc)
if err != nil {
return nil, err
}
fn := &filterNot{
f: f,
}
return fn, nil
default:
if !visitFunc(t) {
// Nothing to copy
return t, nil
}
return copyFunc(t)
}
}
// copyFilters recursively copies filters with the help of copyfunc if visitFunc returns true for them.
//
// It doesn't copy other filters by returning them as is.
func copyFilters(filters []filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) ([]filter, error) {
if !visitFilters(filters, visitFunc) {
// Nothing to copy
return filters, nil
}
// Copy filters.
filtersNew := make([]filter, len(filters))
for i, f := range filters {
fNew, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
return nil, err
}
filtersNew[i] = fNew
}
return filtersNew, nil
} }

View file

@ -31,7 +31,24 @@ func (fa *filterAnd) String() string {
return strings.Join(a, " ") return strings.Join(a, " ")
} }
func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) { func (fa *filterAnd) updateNeededFields(neededFields fieldsSet) {
for _, f := range fa.filters {
f.updateNeededFields(neededFields)
}
}
func (fa *filterAnd) applyToBlockResult(br *blockResult, bm *bitmap) {
for _, f := range fa.filters {
f.applyToBlockResult(br, bm)
if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway.
return
}
}
}
func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
if !fa.matchMessageBloomFilter(bs) { if !fa.matchMessageBloomFilter(bs) {
// Fast path - fa doesn't match _msg bloom filter. // Fast path - fa doesn't match _msg bloom filter.
bm.resetBits() bm.resetBits()
@ -40,7 +57,7 @@ func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
// Slow path - verify every filter separately. // Slow path - verify every filter separately.
for _, f := range fa.filters { for _, f := range fa.filters {
f.apply(bs, bm) f.applyToBlockSearch(bs, bm)
if bm.isZero() { if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters, // Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway. // since the result will be zero anyway.

View file

@ -29,6 +29,10 @@ func (fp *filterAnyCasePhrase) String() string {
return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.phrase)) return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.phrase))
} }
func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePhrase) getTokens() []string { func (fp *filterAnyCasePhrase) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokens
@ -47,7 +51,12 @@ func (fp *filterAnyCasePhrase) initPhraseLowercase() {
fp.phraseLowercase = strings.ToLower(fp.phrase) fp.phraseLowercase = strings.ToLower(fp.phrase)
} }
func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) { func (fp *filterAnyCasePhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
phraseLowercase := fp.getPhraseLowercase()
applyToBlockResultGeneric(br, bm, fp.fieldName, phraseLowercase, matchAnyCasePhrase)
}
func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName fieldName := fp.fieldName
phraseLowercase := fp.getPhraseLowercase() phraseLowercase := fp.getPhraseLowercase()
@ -100,10 +109,12 @@ func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phraseLowercase string) { func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phraseLowercase string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchAnyCasePhrase(v, phraseLowercase) { if matchAnyCasePhrase(v, phraseLowercase) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -33,6 +33,10 @@ func (fp *filterAnyCasePrefix) String() string {
return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix)) return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
} }
func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePrefix) getTokens() []string { func (fp *filterAnyCasePrefix) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokens
@ -51,7 +55,12 @@ func (fp *filterAnyCasePrefix) initPrefixLowercase() {
fp.prefixLowercase = strings.ToLower(fp.prefix) fp.prefixLowercase = strings.ToLower(fp.prefix)
} }
func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) { func (fp *filterAnyCasePrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
prefixLowercase := fp.getPrefixLowercase()
applyToBlockResultGeneric(br, bm, fp.fieldName, prefixLowercase, matchAnyCasePrefix)
}
func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName fieldName := fp.fieldName
prefixLowercase := fp.getPrefixLowercase() prefixLowercase := fp.getPrefixLowercase()
@ -101,10 +110,12 @@ func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) { func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchAnyCasePrefix(v, prefixLowercase) { if matchAnyCasePrefix(v, prefixLowercase) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -24,6 +24,10 @@ func (fe *filterExact) String() string {
return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value)) return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value))
} }
func (fe *filterExact) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fe.fieldName)
}
func (fe *filterExact) getTokens() []string { func (fe *filterExact) getTokens() []string {
fe.tokensOnce.Do(fe.initTokens) fe.tokensOnce.Do(fe.initTokens)
return fe.tokens return fe.tokens
@ -33,7 +37,133 @@ func (fe *filterExact) initTokens() {
fe.tokens = tokenizeStrings(nil, []string{fe.value}) fe.tokens = tokenizeStrings(nil, []string{fe.value})
} }
func (fe *filterExact) apply(bs *blockSearch, bm *bitmap) { func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
value := fe.value
c := br.getColumnByName(fe.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if v != value {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByExactValue(br, bm, c, value)
return
}
switch c.valueType {
case valueTypeString:
matchColumnByExactValue(br, bm, c, value)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if v == value {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<8) {
bm.resetBits()
return
}
nNeeded := uint8(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint8(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint16:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<16) {
bm.resetBits()
return
}
nNeeded := uint16(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint16(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint32:
n, ok := tryParseUint64(value)
if !ok || n >= (1<<32) {
bm.resetBits()
return
}
nNeeded := uint32(n)
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint32(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeUint64:
nNeeded, ok := tryParseUint64(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalUint64(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeFloat64:
fNeeded, ok := tryParseFloat64(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
f := unmarshalFloat64(valuesEncoded[idx])
return f == fNeeded
})
case valueTypeIPv4:
ipNeeded, ok := tryParseIPv4(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
ip := unmarshalIPv4(valuesEncoded[idx])
return ip == ipNeeded
})
case valueTypeTimestampISO8601:
timestampNeeded, ok := tryParseTimestampISO8601(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
timestamp := unmarshalTimestampISO8601(valuesEncoded[idx])
return timestamp == timestampNeeded
})
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByExactValue(br *blockResult, bm *bitmap, c *blockResultColumn, value string) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
return values[idx] == value
})
}
func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fe.fieldName fieldName := fe.fieldName
value := fe.value value := fe.value
@ -121,10 +251,12 @@ func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, val
func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) { func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if v == value { if v == value {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -23,6 +23,10 @@ func (fep *filterExactPrefix) String() string {
return fmt.Sprintf("%sexact(%s*)", quoteFieldNameIfNeeded(fep.fieldName), quoteTokenIfNeeded(fep.prefix)) return fmt.Sprintf("%sexact(%s*)", quoteFieldNameIfNeeded(fep.fieldName), quoteTokenIfNeeded(fep.prefix))
} }
func (fep *filterExactPrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fep.fieldName)
}
func (fep *filterExactPrefix) getTokens() []string { func (fep *filterExactPrefix) getTokens() []string {
fep.tokensOnce.Do(fep.initTokens) fep.tokensOnce.Do(fep.initTokens)
return fep.tokens return fep.tokens
@ -32,7 +36,11 @@ func (fep *filterExactPrefix) initTokens() {
fep.tokens = getTokensSkipLast(fep.prefix) fep.tokens = getTokensSkipLast(fep.prefix)
} }
func (fep *filterExactPrefix) apply(bs *blockSearch, bm *bitmap) { func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
applyToBlockResultGeneric(br, bm, fep.fieldName, fep.prefix, matchExactPrefix)
}
func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fep.fieldName fieldName := fep.fieldName
prefix := fep.prefix prefix := fep.prefix
@ -91,7 +99,7 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return matchExactPrefix(s, prefix) return matchExactPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -108,7 +116,7 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchExactPrefix(s, prefix) return matchExactPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -126,7 +134,7 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchExactPrefix(s, prefix) return matchExactPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -134,10 +142,12 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchExactPrefix(v, prefix) { if matchExactPrefix(v, prefix) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -18,6 +18,15 @@ type filterIn struct {
fieldName string fieldName string
values []string values []string
// needeExecuteQuery is set to true if q must be executed for populating values before filter execution.
needExecuteQuery bool
// If q is non-nil, then values must be populated from q before filter execution.
q *Query
// qFieldName must be set to field name for obtaining values from if q is non-nil.
qFieldName string
tokenSetsOnce sync.Once tokenSetsOnce sync.Once
tokenSets [][]string tokenSets [][]string
@ -47,12 +56,22 @@ type filterIn struct {
} }
func (fi *filterIn) String() string { func (fi *filterIn) String() string {
values := fi.values args := ""
a := make([]string, len(values)) if fi.q != nil {
for i, value := range values { args = fi.q.String()
a[i] = quoteTokenIfNeeded(value) } else {
values := fi.values
a := make([]string, len(values))
for i, value := range values {
a[i] = quoteTokenIfNeeded(value)
}
args = strings.Join(a, ",")
} }
return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), strings.Join(a, ",")) return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), args)
}
func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fi.fieldName)
} }
func (fi *filterIn) getTokenSets() [][]string { func (fi *filterIn) getTokenSets() [][]string {
@ -249,7 +268,95 @@ func (fi *filterIn) initTimestampISO8601Values() {
fi.timestampISO8601Values = m fi.timestampISO8601Values = m
} }
func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) { func (fi *filterIn) applyToBlockResult(br *blockResult, bm *bitmap) {
if len(fi.values) == 0 {
bm.resetBits()
return
}
c := br.getColumnByName(fi.fieldName)
if c.isConst {
stringValues := fi.getStringValues()
v := c.valuesEncoded[0]
if _, ok := stringValues[v]; !ok {
bm.resetBits()
}
return
}
if c.isTime {
fi.matchColumnByStringValues(br, bm, c)
return
}
switch c.valueType {
case valueTypeString:
fi.matchColumnByStringValues(br, bm, c)
case valueTypeDict:
stringValues := fi.getStringValues()
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if _, ok := stringValues[v]; ok {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
binValues := fi.getUint8Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint16:
binValues := fi.getUint16Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint32:
binValues := fi.getUint32Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeUint64:
binValues := fi.getUint64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeFloat64:
binValues := fi.getFloat64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeIPv4:
binValues := fi.getIPv4Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeTimestampISO8601:
binValues := fi.getTimestampISO8601Values()
matchColumnByBinValues(br, bm, c, binValues)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fi *filterIn) matchColumnByStringValues(br *blockResult, bm *bitmap, c *blockResultColumn) {
stringValues := fi.getStringValues()
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
_, ok := stringValues[v]
return ok
})
}
func matchColumnByBinValues(br *blockResult, bm *bitmap, c *blockResultColumn, binValues map[string]struct{}) {
if len(binValues) == 0 {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
_, ok := binValues[v]
return ok
})
}
func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fi.fieldName fieldName := fi.fieldName
if len(fi.values) == 0 { if len(fi.values) == 0 {
@ -314,6 +421,10 @@ func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) {
} }
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) { func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) {
if len(values) == 0 {
bm.resetBits()
return
}
if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) { if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) {
bm.resetBits() bm.resetBits()
return return
@ -344,10 +455,12 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets []
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if _, ok := values[v]; ok { if _, ok := values[v]; ok {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -3,8 +3,6 @@ package logstorage
import ( import (
"fmt" "fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
@ -18,12 +16,83 @@ type filterIPv4Range struct {
} }
func (fr *filterIPv4Range) String() string { func (fr *filterIPv4Range) String() string {
minValue := string(encoding.MarshalUint32(nil, fr.minValue)) minValue := marshalIPv4String(nil, fr.minValue)
maxValue := string(encoding.MarshalUint32(nil, fr.maxValue)) maxValue := marshalIPv4String(nil, fr.maxValue)
return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), toIPv4String(nil, minValue), toIPv4String(nil, maxValue)) return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), minValue, maxValue)
} }
func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) { func (fr *filterIPv4Range) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterIPv4Range) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchIPv4Range(v, minValue, maxValue) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchIPv4Range(v, minValue, maxValue)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchIPv4Range(v, minValue, maxValue) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
ip := unmarshalIPv4(valuesEncoded[idx])
return ip >= minValue && ip <= maxValue
})
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName fieldName := fr.fieldName
minValue := fr.minValue minValue := fr.minValue
maxValue := fr.maxValue maxValue := fr.maxValue
@ -75,10 +144,12 @@ func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) {
func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) { func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchIPv4Range(v, minValue, maxValue) { if matchIPv4Range(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)
@ -108,8 +179,7 @@ func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, m
if len(v) != 4 { if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := unmarshalIPv4(v)
n := encoding.UnmarshalUint32(b)
return n >= minValue && n <= maxValue return n >= minValue && n <= maxValue
}) })
} }

View file

@ -3,7 +3,6 @@ package logstorage
import ( import (
"unicode/utf8" "unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
@ -22,7 +21,101 @@ func (fr *filterLenRange) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr
} }
func (fr *filterLenRange) apply(bs *blockSearch, bm *bitmap) { func (fr *filterLenRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterLenRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minLen := fr.minLen
maxLen := fr.maxLen
if minLen > maxLen {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchLenRange(v, minLen, maxLen) {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByLenRange(br, bm, c, minLen, maxLen)
}
switch c.valueType {
case valueTypeString:
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchLenRange(v, minLen, maxLen) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
if minLen > 3 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint16:
if minLen > 5 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint32:
if minLen > 10 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeUint64:
if minLen > 20 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeFloat64:
if minLen > 24 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeIPv4:
if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeTimestampISO8601:
matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByLenRange(br *blockResult, bm *bitmap, c *blockResultColumn, minLen, maxLen uint64) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchLenRange(v, minLen, maxLen)
})
}
func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName fieldName := fr.fieldName
minLen := fr.minLen minLen := fr.minLen
maxLen := fr.maxLen maxLen := fr.maxLen
@ -89,7 +182,7 @@ func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchLenRange(s, minLen, maxLen) return matchLenRange(s, minLen, maxLen)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -103,7 +196,7 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchLenRange(s, minLen, maxLen) return matchLenRange(s, minLen, maxLen)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -111,10 +204,12 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchLenRange(v, minLen, maxLen) { if matchLenRange(v, minLen, maxLen) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)
@ -127,6 +222,10 @@ func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
} }
func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 3 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) { if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits() bm.resetBits()
return return
@ -141,6 +240,10 @@ func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
} }
func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 5 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) { if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits() bm.resetBits()
return return
@ -155,6 +258,10 @@ func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
} }
func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 10 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) { if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits() bm.resetBits()
return return
@ -169,6 +276,10 @@ func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
} }
func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 20 || maxLen == 0 {
bm.resetBits()
return
}
if !matchMinMaxValueLen(ch, minLen, maxLen) { if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits() bm.resetBits()
return return
@ -191,12 +302,10 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
bb := bbPool.Get() bb := bbPool.Get()
defer bbPool.Put(bb) defer bbPool.Put(bb)
bb.B = marshalUint64(bb.B[:0], ch.minValue) bb.B = marshalUint64String(bb.B[:0], ch.minValue)
s := bytesutil.ToUnsafeString(bb.B) if maxLen < uint64(len(bb.B)) {
if maxLen < uint64(len(s)) {
return false return false
} }
bb.B = marshalUint64(bb.B[:0], ch.maxValue) bb.B = marshalUint64String(bb.B[:0], ch.maxValue)
s = bytesutil.ToUnsafeString(bb.B) return minLen <= uint64(len(bb.B))
return minLen <= uint64(len(s))
} }

View file

@ -8,6 +8,14 @@ func (fn *filterNoop) String() string {
return "" return ""
} }
func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) { func (fn *filterNoop) updateNeededFields(_ fieldsSet) {
// nothing to do
}
func (fn *filterNoop) applyToBlockResult(_ *blockResult, _ *bitmap) {
// nothing to do
}
func (fn *filterNoop) applyToBlockSearch(_ *blockSearch, _ *bitmap) {
// nothing to do // nothing to do
} }

View file

@ -16,12 +16,26 @@ func (fn *filterNot) String() string {
return "!" + s return "!" + s
} }
func (fn *filterNot) apply(bs *blockSearch, bm *bitmap) { func (fn *filterNot) updateNeededFields(neededFields fieldsSet) {
fn.f.updateNeededFields(neededFields)
}
func (fn *filterNot) applyToBlockResult(br *blockResult, bm *bitmap) {
// Minimize the number of rows to check by the filter by applying it // Minimize the number of rows to check by the filter by applying it
// only to the rows, which match the bm, e.g. they may change the bm result. // only to the rows, which match the bm, e.g. they may change the bm result.
bmTmp := getBitmap(bm.bitsLen) bmTmp := getBitmap(bm.bitsLen)
bmTmp.copyFrom(bm) bmTmp.copyFrom(bm)
fn.f.apply(bs, bmTmp) fn.f.applyToBlockResult(br, bmTmp)
bm.andNot(bmTmp)
putBitmap(bmTmp)
}
func (fn *filterNot) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
// Minimize the number of rows to check by the filter by applying it
// only to the rows, which match the bm, e.g. they may change the bm result.
bmTmp := getBitmap(bm.bitsLen)
bmTmp.copyFrom(bm)
fn.f.applyToBlockSearch(bs, bmTmp)
bm.andNot(bmTmp) bm.andNot(bmTmp)
putBitmap(bmTmp) putBitmap(bmTmp)
} }

View file

@ -21,7 +21,13 @@ func (fo *filterOr) String() string {
return strings.Join(a, " or ") return strings.Join(a, " or ")
} }
func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) { func (fo *filterOr) updateNeededFields(neededFields fieldsSet) {
for _, f := range fo.filters {
f.updateNeededFields(neededFields)
}
}
func (fo *filterOr) applyToBlockResult(br *blockResult, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen) bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen) bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters { for _, f := range fo.filters {
@ -36,7 +42,30 @@ func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
// since the result already matches all the values from the block. // since the result already matches all the values from the block.
break break
} }
f.apply(bs, bmTmp) f.applyToBlockResult(br, bmTmp)
bmResult.or(bmTmp)
}
putBitmap(bmTmp)
bm.copyFrom(bmResult)
putBitmap(bmResult)
}
func (fo *filterOr) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters {
// Minimize the number of rows to check by the filter by checking only
// the rows, which may change the output bm:
// - bm matches them, e.g. the caller wants to get them
// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
bmTmp.copyFrom(bm)
bmTmp.andNot(bmResult)
if bmTmp.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result already matches all the values from the block.
break
}
f.applyToBlockSearch(bs, bmTmp)
bmResult.or(bmTmp) bmResult.or(bmTmp)
} }
putBitmap(bmTmp) putBitmap(bmTmp)

View file

@ -32,6 +32,10 @@ func (fp *filterPhrase) String() string {
return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase) return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase)
} }
func (fp *filterPhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterPhrase) getTokens() []string { func (fp *filterPhrase) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokens
@ -41,7 +45,11 @@ func (fp *filterPhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
} }
func (fp *filterPhrase) apply(bs *blockSearch, bm *bitmap) { func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
applyToBlockResultGeneric(br, bm, fp.fieldName, fp.phrase, matchPhrase)
}
func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName fieldName := fp.fieldName
phrase := fp.phrase phrase := fp.phrase
@ -107,7 +115,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return matchPhrase(s, phrase) return matchPhrase(s, phrase)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -131,7 +139,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchPhrase(s, phrase) return matchPhrase(s, phrase)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -160,7 +168,7 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchPhrase(s, phrase) return matchPhrase(s, phrase)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -168,10 +176,12 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) { func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchPhrase(v, phrase) { if matchPhrase(v, phrase) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)
@ -249,7 +259,7 @@ func getPhrasePos(s, phrase string) int {
} }
func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) { func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) {
if len(encodedValues) == 0 { if bytes.IndexByte(encodedValues, 1) < 0 {
// Fast path - the phrase is missing in the valuesDict // Fast path - the phrase is missing in the valuesDict
bm.resetBits() bm.resetBits()
return return
@ -259,8 +269,11 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod
if len(v) != 1 { if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
} }
n := bytes.IndexByte(encodedValues, v[0]) idx := v[0]
return n >= 0 if int(idx) >= len(encodedValues) {
logger.Panicf("FATAL: %s: too big index for dict value; got %d; must be smaller than %d", bs.partPath(), idx, len(encodedValues))
}
return encodedValues[idx] == 1
}) })
} }
@ -294,26 +307,107 @@ func isMsgFieldName(fieldName string) bool {
return fieldName == "" || fieldName == "_msg" return fieldName == "" || fieldName == "_msg"
} }
func toFloat64StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { func toFloat64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 8 { if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
} }
bb.B = toFloat64String(bb.B[:0], v) f := unmarshalFloat64(v)
bb.B = marshalFloat64String(bb.B[:0], f)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
func toIPv4StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { func toIPv4String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 4 { if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
} }
bb.B = toIPv4String(bb.B[:0], v) ip := unmarshalIPv4(v)
bb.B = marshalIPv4String(bb.B[:0], ip)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
func toTimestampISO8601StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { func toTimestampISO8601String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 8 { if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v))
} }
bb.B = toTimestampISO8601String(bb.B[:0], v) timestamp := unmarshalTimestampISO8601(v)
bb.B = marshalTimestampISO8601String(bb.B[:0], timestamp)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
func applyToBlockResultGeneric(br *blockResult, bm *bitmap, fieldName, phrase string, matchFunc func(v, phrase string) bool) {
c := br.getColumnByName(fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchFunc(v, phrase) {
bm.resetBits()
}
return
}
if c.isTime {
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
return
}
switch c.valueType {
case valueTypeString:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchFunc(v, phrase) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<8) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint16:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<16) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint32:
n, ok := tryParseUint64(phrase)
if !ok || n >= (1<<32) {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeUint64:
_, ok := tryParseUint64(phrase)
if !ok {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeFloat64:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeIPv4:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeTimestampISO8601:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func matchColumnByPhraseGeneric(br *blockResult, bm *bitmap, c *blockResultColumn, phrase string, matchFunc func(v, phrase string) bool) {
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
return matchFunc(values[idx], phrase)
})
}

View file

@ -7,7 +7,6 @@ import (
"unicode/utf8" "unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
@ -31,6 +30,10 @@ func (fp *filterPrefix) String() string {
return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix)) return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
} }
func (fp *filterPrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterPrefix) getTokens() []string { func (fp *filterPrefix) getTokens() []string {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokens
@ -40,7 +43,11 @@ func (fp *filterPrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix) fp.tokens = getTokensSkipLast(fp.prefix)
} }
func (fp *filterPrefix) apply(bs *blockSearch, bm *bitmap) { func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
applyToBlockResultGeneric(bs, bm, fp.fieldName, fp.prefix, matchPrefix)
}
func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fp.fieldName fieldName := fp.fieldName
prefix := fp.prefix prefix := fp.prefix
@ -102,7 +109,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return matchPrefix(s, prefix) return matchPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -123,7 +130,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchPrefix(s, prefix) return matchPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -151,7 +158,7 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchPrefix(s, prefix) return matchPrefix(s, prefix)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -159,10 +166,12 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchPrefix(v, prefix) { if matchPrefix(v, prefix) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)
@ -321,8 +330,8 @@ func toUint8String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 1 { if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
} }
n := uint64(v[0]) n := unmarshalUint8(v)
bb.B = marshalUint64(bb.B[:0], n) bb.B = marshalUint8String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
@ -330,9 +339,8 @@ func toUint16String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 2 { if len(v) != 2 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := unmarshalUint16(v)
n := uint64(encoding.UnmarshalUint16(b)) bb.B = marshalUint16String(bb.B[:0], n)
bb.B = marshalUint64(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
@ -340,9 +348,8 @@ func toUint32String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 4 { if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := unmarshalUint32(v)
n := uint64(encoding.UnmarshalUint32(b)) bb.B = marshalUint32String(bb.B[:0], n)
bb.B = marshalUint64(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }
@ -350,8 +357,7 @@ func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
if len(v) != 8 { if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := unmarshalUint64(v)
n := encoding.UnmarshalUint64(b) bb.B = marshalUint64String(bb.B[:0], n)
bb.B = marshalUint64(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B) return bytesutil.ToUnsafeString(bb.B)
} }

View file

@ -3,8 +3,6 @@ package logstorage
import ( import (
"math" "math"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
@ -13,17 +11,132 @@ import (
// Example LogsQL: `fieldName:range(minValue, maxValue]` // Example LogsQL: `fieldName:range(minValue, maxValue]`
type filterRange struct { type filterRange struct {
fieldName string fieldName string
minValue float64
maxValue float64 minValue float64
maxValue float64
stringRepr string stringRepr string
} }
func (fr *filterRange) String() string { func (fr *filterRange) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "range" + fr.stringRepr return quoteFieldNameIfNeeded(fr.fieldName) + fr.stringRepr
} }
func (fr *filterRange) apply(bs *blockSearch, bm *bitmap) { func (fr *filterRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
c := br.getColumnByName(fr.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchRange(v, minValue, maxValue) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchRange(v, minValue, maxValue)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchRange(v, minValue, maxValue) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint8(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint16:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint16(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint32:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := uint64(unmarshalUint32(v))
return n >= minValueUint && n <= maxValueUint
})
case valueTypeUint64:
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := unmarshalUint64(v)
return n >= minValueUint && n <= maxValueUint
})
case valueTypeFloat64:
if minValue > math.Float64frombits(c.maxValue) || maxValue < math.Float64frombits(c.minValue) {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
f := unmarshalFloat64(v)
return f >= minValue && f <= maxValue
})
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName fieldName := fr.fieldName
minValue := fr.minValue minValue := fr.minValue
maxValue := fr.maxValue maxValue := fr.maxValue
@ -83,19 +196,19 @@ func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue
if len(v) != 8 { if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) f := unmarshalFloat64(v)
n := encoding.UnmarshalUint64(b)
f := math.Float64frombits(n)
return f >= minValue && f <= maxValue return f >= minValue && f <= maxValue
}) })
} }
func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) { func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchRange(v, minValue, maxValue) { if matchRange(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)
@ -118,7 +231,7 @@ func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 1 { if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
} }
n := uint64(v[0]) n := uint64(unmarshalUint8(v))
return n >= minValueUint && n <= maxValueUint return n >= minValueUint && n <= maxValueUint
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -135,8 +248,7 @@ func matchUint16ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 2 { if len(v) != 2 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := uint64(unmarshalUint16(v))
n := uint64(encoding.UnmarshalUint16(b))
return n >= minValueUint && n <= maxValueUint return n >= minValueUint && n <= maxValueUint
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -153,8 +265,7 @@ func matchUint32ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 4 { if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := uint64(unmarshalUint32(v))
n := uint64(encoding.UnmarshalUint32(b))
return n >= minValueUint && n <= maxValueUint return n >= minValueUint && n <= maxValueUint
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -171,8 +282,7 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
if len(v) != 8 { if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v)) logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v))
} }
b := bytesutil.ToUnsafeBytes(v) n := unmarshalUint64(v)
n := encoding.UnmarshalUint64(b)
return n >= minValueUint && n <= maxValueUint return n >= minValueUint && n <= maxValueUint
}) })
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -19,7 +19,18 @@ func (fr *filterRegexp) String() string {
return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
} }
func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) { func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
re := fr.re
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
return re.MatchString(v)
})
}
func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName fieldName := fr.fieldName
re := fr.re re := fr.re
@ -69,7 +80,7 @@ func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) {
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return re.MatchString(s) return re.MatchString(s)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -78,7 +89,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return re.MatchString(s) return re.MatchString(s)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -87,7 +98,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return re.MatchString(s) return re.MatchString(s)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -95,10 +106,12 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if re.MatchString(v) { if re.MatchString(v) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -31,6 +31,10 @@ func (fs *filterSequence) String() string {
return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(fs.fieldName), strings.Join(a, ",")) return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(fs.fieldName), strings.Join(a, ","))
} }
func (fs *filterSequence) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fs.fieldName)
}
func (fs *filterSequence) getTokens() []string { func (fs *filterSequence) getTokens() []string {
fs.tokensOnce.Do(fs.initTokens) fs.tokensOnce.Do(fs.initTokens)
return fs.tokens return fs.tokens
@ -58,7 +62,18 @@ func (fs *filterSequence) initNonEmptyPhrases() {
fs.nonEmptyPhrases = result fs.nonEmptyPhrases = result
} }
func (fs *filterSequence) apply(bs *blockSearch, bm *bitmap) { func (fs *filterSequence) applyToBlockResult(br *blockResult, bm *bitmap) {
phrases := fs.getNonEmptyPhrases()
if len(phrases) == 0 {
return
}
applyToBlockResultGeneric(br, bm, fs.fieldName, "", func(v, _ string) bool {
return matchSequence(v, phrases)
})
}
func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fs.fieldName fieldName := fs.fieldName
phrases := fs.getNonEmptyPhrases() phrases := fs.getNonEmptyPhrases()
@ -124,7 +139,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
// Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp. // Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return matchSequence(s, phrases) return matchSequence(s, phrases)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -145,7 +160,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
// the ip to string before searching for prefix there. // the ip to string before searching for prefix there.
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchSequence(s, phrases) return matchSequence(s, phrases)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -163,7 +178,7 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
// of floating-point numbers :( // of floating-point numbers :(
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchSequence(s, phrases) return matchSequence(s, phrases)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -171,10 +186,12 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) { func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchSequence(v, phrases) { if matchSequence(v, phrases) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -2,6 +2,8 @@ package logstorage
import ( import (
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
// filterStream is the filter for `_stream:{...}` // filterStream is the filter for `_stream:{...}`
@ -27,6 +29,10 @@ func (fs *filterStream) String() string {
return "_stream:" + s return "_stream:" + s
} }
func (fs *filterStream) updateNeededFields(neededFields fieldsSet) {
neededFields.add("_stream")
}
func (fs *filterStream) getStreamIDs() map[streamID]struct{} { func (fs *filterStream) getStreamIDs() map[streamID]struct{} {
fs.streamIDsOnce.Do(fs.initStreamIDs) fs.streamIDsOnce.Do(fs.initStreamIDs)
return fs.streamIDs return fs.streamIDs
@ -41,7 +47,66 @@ func (fs *filterStream) initStreamIDs() {
fs.streamIDs = m fs.streamIDs = m
} }
func (fs *filterStream) apply(bs *blockSearch, bm *bitmap) { func (fs *filterStream) applyToBlockResult(br *blockResult, bm *bitmap) {
if fs.f.isEmpty() {
return
}
c := br.getColumnByName("_stream")
if c.isConst {
v := c.valuesEncoded[0]
if !fs.f.matchStreamName(v) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return fs.f.matchStreamName(v)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if fs.f.matchStreamName(v) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
bm.resetBits()
case valueTypeTimestampISO8601:
bm.resetBits()
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (fs *filterStream) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
if fs.f.isEmpty() { if fs.f.isEmpty() {
return return
} }

View file

@ -22,7 +22,25 @@ func (fr *filterStringRange) String() string {
return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.minValue), quoteTokenIfNeeded(fr.maxValue)) return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.minValue), quoteTokenIfNeeded(fr.maxValue))
} }
func (fr *filterStringRange) apply(bs *blockSearch, bm *bitmap) { func (fr *filterStringRange) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterStringRange) applyToBlockResult(br *blockResult, bm *bitmap) {
minValue := fr.minValue
maxValue := fr.maxValue
if minValue > maxValue {
bm.resetBits()
return
}
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
return matchStringRange(v, minValue, maxValue)
})
}
func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName fieldName := fr.fieldName
minValue := fr.minValue minValue := fr.minValue
maxValue := fr.maxValue maxValue := fr.maxValue
@ -81,7 +99,7 @@ func matchTimestampISO8601ByStringRange(bs *blockSearch, ch *columnHeader, bm *b
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
return matchStringRange(s, minValue, maxValue) return matchStringRange(s, minValue, maxValue)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -95,7 +113,7 @@ func matchIPv4ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minVa
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v) s := toIPv4String(bs, bb, v)
return matchStringRange(s, minValue, maxValue) return matchStringRange(s, minValue, maxValue)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -109,7 +127,7 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v) s := toFloat64String(bs, bb, v)
return matchStringRange(s, minValue, maxValue) return matchStringRange(s, minValue, maxValue)
}) })
bbPool.Put(bb) bbPool.Put(bb)
@ -117,10 +135,12 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) { func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0)
if matchStringRange(v, minValue, maxValue) { if matchStringRange(v, minValue, maxValue) {
bb.B = append(bb.B, byte(i)) c = 1
} }
bb.B = append(bb.B, c)
} }
matchEncodedValuesDict(bs, ch, bm, bb.B) matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb) bbPool.Put(bb)

View file

@ -197,11 +197,6 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
} }
workersCount := 3 workersCount := 3
s.search(workersCount, so, nil, func(_ uint, br *blockResult) { s.search(workersCount, so, nil, func(_ uint, br *blockResult) {
// Verify tenantID
if !br.streamID.tenantID.equal(&tenantID) {
t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID)
}
// Verify columns // Verify columns
cs := br.getColumns() cs := br.getColumns()
if len(cs) != 1 { if len(cs) != 1 {

View file

@ -1,8 +1,12 @@
package logstorage package logstorage
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// filterTime filters by time. // filterTime filters by time.
// //
// It is expressed as `_time:(start, end]` in LogsQL. // It is expressed as `_time:[start, end]` in LogsQL.
type filterTime struct { type filterTime struct {
// mintimestamp is the minimum timestamp in nanoseconds to find // mintimestamp is the minimum timestamp in nanoseconds to find
minTimestamp int64 minTimestamp int64
@ -18,7 +22,95 @@ func (ft *filterTime) String() string {
return "_time:" + ft.stringRepr return "_time:" + ft.stringRepr
} }
func (ft *filterTime) apply(bs *blockSearch, bm *bitmap) { func (ft *filterTime) updateNeededFields(neededFields fieldsSet) {
neededFields.add("_time")
}
func (ft *filterTime) applyToBlockResult(br *blockResult, bm *bitmap) {
minTimestamp := ft.minTimestamp
maxTimestamp := ft.maxTimestamp
if minTimestamp > maxTimestamp {
bm.resetBits()
return
}
c := br.getColumnByName("_time")
if c.isConst {
v := c.valuesEncoded[0]
if !ft.matchTimestampString(v) {
bm.resetBits()
}
return
}
if c.isTime {
timestamps := br.timestamps
bm.forEachSetBit(func(idx int) bool {
timestamp := timestamps[idx]
return ft.matchTimestampValue(timestamp)
})
return
}
switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return ft.matchTimestampString(v)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if ft.matchTimestampString(v) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
case valueTypeUint8:
bm.resetBits()
case valueTypeUint16:
bm.resetBits()
case valueTypeUint32:
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
bm.resetBits()
case valueTypeTimestampISO8601:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
timestamp := unmarshalTimestampISO8601(v)
return ft.matchTimestampValue(timestamp)
})
default:
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
}
}
func (ft *filterTime) matchTimestampString(v string) bool {
timestamp, ok := tryParseTimestampRFC3339Nano(v)
if !ok {
return false
}
return ft.matchTimestampValue(timestamp)
}
func (ft *filterTime) matchTimestampValue(timestamp int64) bool {
return timestamp >= ft.minTimestamp && timestamp <= ft.maxTimestamp
}
func (ft *filterTime) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
minTimestamp := ft.minTimestamp minTimestamp := ft.minTimestamp
maxTimestamp := ft.maxTimestamp maxTimestamp := ft.maxTimestamp

View file

@ -268,7 +268,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
} }
return ids return ids
case "=~": case "=~":
re := tf.getRegexp() re := tf.regexp
if re.MatchString("") { if re.MatchString("") {
// (field=~"|re") => (field="" or field=~"re") // (field=~"|re") => (field="" or field=~"re")
ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName) ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
@ -280,7 +280,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
} }
return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re) return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
case "!~": case "!~":
re := tf.getRegexp() re := tf.regexp
if re.MatchString("") { if re.MatchString("") {
// (field!~"|re") => (field!="" and not field=~"re") // (field!~"|re") => (field!="" and not field=~"re")
ids := is.getStreamIDsForTagName(tenantID, tf.tagName) ids := is.getStreamIDsForTagName(tenantID, tf.tagName)

View file

@ -50,7 +50,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
f := func(filterStream string, expectedStreamIDs []streamID) { f := func(filterStream string, expectedStreamIDs []streamID) {
t.Helper() t.Helper()
sf := mustNewStreamFilter(filterStream) sf := mustNewTestStreamFilter(filterStream)
if expectedStreamIDs == nil { if expectedStreamIDs == nil {
expectedStreamIDs = []streamID{} expectedStreamIDs = []streamID{}
} }
@ -68,7 +68,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
AccountID: 1, AccountID: 1,
ProjectID: 2, ProjectID: 2,
} }
sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`) sf := mustNewTestStreamFilter(`{job="job-0",instance="instance-0"}`)
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf) streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
if len(streamIDs) > 0 { if len(streamIDs) > 0 {

View file

@ -1,4 +1,4 @@
package logjson package logstorage
import ( import (
"fmt" "fmt"
@ -6,21 +6,20 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/valyala/fastjson" "github.com/valyala/fastjson"
) )
// Parser parses a single JSON log message into Fields. // JSONParser parses a single JSON log message into Fields.
// //
// See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model // See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model
// //
// Use GetParser() for obtaining the parser. // Use GetParser() for obtaining the parser.
type Parser struct { type JSONParser struct {
// Fields contains the parsed JSON line after Parse() call // Fields contains the parsed JSON line after Parse() call
// //
// The Fields are valid until the next call to ParseLogMessage() // The Fields are valid until the next call to ParseLogMessage()
// or until the parser is returned to the pool with PutParser() call. // or until the parser is returned to the pool with PutParser() call.
Fields []logstorage.Field Fields []Field
// p is used for fast JSON parsing // p is used for fast JSON parsing
p fastjson.Parser p fastjson.Parser
@ -33,59 +32,79 @@ type Parser struct {
prefixBuf []byte prefixBuf []byte
} }
func (p *Parser) reset() { func (p *JSONParser) reset() {
fields := p.Fields p.resetNobuf()
for i := range fields {
lf := &fields[i]
lf.Name = ""
lf.Value = ""
}
p.Fields = fields[:0]
p.buf = p.buf[:0] p.buf = p.buf[:0]
}
func (p *JSONParser) resetNobuf() {
clear(p.Fields)
p.Fields = p.Fields[:0]
p.prefixBuf = p.prefixBuf[:0] p.prefixBuf = p.prefixBuf[:0]
} }
// GetParser returns Parser ready to parse JSON lines. // GetJSONParser returns JSONParser ready to parse JSON lines.
// //
// Return the parser to the pool when it is no longer needed by calling PutParser(). // Return the parser to the pool when it is no longer needed by calling PutJSONParser().
func GetParser() *Parser { func GetJSONParser() *JSONParser {
v := parserPool.Get() v := parserPool.Get()
if v == nil { if v == nil {
return &Parser{} return &JSONParser{}
} }
return v.(*Parser) return v.(*JSONParser)
} }
// PutParser returns the parser to the pool. // PutJSONParser returns the parser to the pool.
// //
// The parser cannot be used after returning to the pool. // The parser cannot be used after returning to the pool.
func PutParser(p *Parser) { func PutJSONParser(p *JSONParser) {
p.reset() p.reset()
parserPool.Put(p) parserPool.Put(p)
} }
var parserPool sync.Pool var parserPool sync.Pool
// ParseLogMessageNoResetBuf parses the given JSON log message msg into p.Fields.
//
// It adds the given prefix to all the parsed field names.
//
// The p.Fields remains valid until the next call to PutJSONParser().
func (p *JSONParser) ParseLogMessageNoResetBuf(msg, prefix string) error {
return p.parseLogMessage(msg, prefix, false)
}
// ParseLogMessage parses the given JSON log message msg into p.Fields. // ParseLogMessage parses the given JSON log message msg into p.Fields.
// //
// The p.Fields remains valid until the next call to ParseLogMessage() or PutParser(). // It adds the given prefix to all the parsed field names.
func (p *Parser) ParseLogMessage(msg []byte) error { //
s := bytesutil.ToUnsafeString(msg) // The p.Fields remains valid until the next call to ParseLogMessage() or PutJSONParser().
v, err := p.p.Parse(s) func (p *JSONParser) ParseLogMessage(msg []byte, prefix string) error {
msgStr := bytesutil.ToUnsafeString(msg)
return p.parseLogMessage(msgStr, prefix, true)
}
func (p *JSONParser) parseLogMessage(msg, prefix string, resetBuf bool) error {
v, err := p.p.Parse(msg)
if err != nil { if err != nil {
return fmt.Errorf("cannot parse json: %w", err) return fmt.Errorf("cannot parse json: %w", err)
} }
if t := v.Type(); t != fastjson.TypeObject { if t := v.Type(); t != fastjson.TypeObject {
return fmt.Errorf("expecting json dictionary; got %s", t) return fmt.Errorf("expecting json dictionary; got %s", t)
} }
p.reset() if resetBuf {
p.reset()
} else {
p.resetNobuf()
}
p.prefixBuf = append(p.prefixBuf[:0], prefix...)
p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v) p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v)
return nil return nil
} }
// RenameField renames field with the oldName to newName in p.Fields // RenameField renames field with the oldName to newName in p.Fields
func (p *Parser) RenameField(oldName, newName string) { func (p *JSONParser) RenameField(oldName, newName string) {
if oldName == "" { if oldName == "" {
return return
} }
@ -99,7 +118,7 @@ func (p *Parser) RenameField(oldName, newName string) {
} }
} }
func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) { func appendLogFields(dst []Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]Field, []byte, []byte) {
o := v.GetObject() o := v.GetObject()
o.Visit(func(k []byte, v *fastjson.Value) { o.Visit(func(k []byte, v *fastjson.Value) {
t := v.Type() t := v.Type()
@ -133,13 +152,13 @@ func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjs
return dst, dstBuf, prefixBuf return dst, dstBuf, prefixBuf
} }
func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) { func appendLogField(dst []Field, dstBuf, prefixBuf, k, value []byte) ([]Field, []byte) {
dstBufLen := len(dstBuf) dstBufLen := len(dstBuf)
dstBuf = append(dstBuf, prefixBuf...) dstBuf = append(dstBuf, prefixBuf...)
dstBuf = append(dstBuf, k...) dstBuf = append(dstBuf, k...)
name := dstBuf[dstBufLen:] name := dstBuf[dstBufLen:]
dst = append(dst, logstorage.Field{ dst = append(dst, Field{
Name: bytesutil.ToUnsafeString(name), Name: bytesutil.ToUnsafeString(name),
Value: bytesutil.ToUnsafeString(value), Value: bytesutil.ToUnsafeString(value),
}) })

View file

@ -0,0 +1,97 @@
package logstorage
import (
"reflect"
"testing"
)
func TestJSONParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), "")
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutJSONParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestJSONParserSuccess(t *testing.T) {
f := func(data, prefix string, fieldsExpected []Field) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), prefix)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutJSONParser(p)
}
f("{}", "", nil)
f(`{"foo":"bar"}`, "", []Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":"bar"}`, "prefix_", []Field{
{
Name: "prefix_foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "", []Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "prefix_", []Field{
{
Name: "prefix_foo.bar",
Value: "baz",
},
{
Name: "prefix_a",
Value: "1",
},
{
Name: "prefix_b",
Value: "true",
},
{
Name: "prefix_c",
Value: "[1,2]",
},
{
Name: "prefix_d",
Value: "false",
},
})
}

View file

@ -10,8 +10,8 @@ import (
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
type lexer struct { type lexer struct {
@ -39,6 +39,20 @@ type lexer struct {
currentTimestamp int64 currentTimestamp int64
} }
type lexerState struct {
lex lexer
}
func (lex *lexer) backupState() *lexerState {
return &lexerState{
lex: *lex,
}
}
func (lex *lexer) restoreState(ls *lexerState) {
*lex = ls.lex
}
// newLexer returns new lexer for the given s. // newLexer returns new lexer for the given s.
// //
// The lex.token points to the first token in s. // The lex.token points to the first token in s.
@ -206,10 +220,45 @@ func (q *Query) String() string {
return s return s
} }
// AddCountByTimePipe adds '| stats by (_time:step offset off, field1, ..., fieldN) count() hits' to the end of q.
func (q *Query) AddCountByTimePipe(step, off int64, fields []string) {
{
// add 'stats by (_time:step offset off, fields) count() hits'
stepStr := string(marshalDuration(nil, step))
offsetStr := string(marshalDuration(nil, off))
byFieldsStr := "_time:" + stepStr + " offset " + offsetStr
for _, f := range fields {
byFieldsStr += ", " + quoteTokenIfNeeded(f)
}
s := fmt.Sprintf("stats by (%s) count() hits", byFieldsStr)
lex := newLexer(s)
ps, err := parsePipeStats(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
}
q.pipes = append(q.pipes, ps)
}
{
// Add 'sort by (_time, fields)' in order to get consistent order of the results.
sortFieldsStr := "_time"
for _, f := range fields {
sortFieldsStr += ", " + quoteTokenIfNeeded(f)
}
s := fmt.Sprintf("sort by (%s)", sortFieldsStr)
lex := newLexer(s)
ps, err := parsePipeSort(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
}
q.pipes = append(q.pipes, ps)
}
}
// AddTimeFilter adds global filter _time:[start ... end] to q. // AddTimeFilter adds global filter _time:[start ... end] to q.
func (q *Query) AddTimeFilter(start, end int64) { func (q *Query) AddTimeFilter(start, end int64) {
startStr := marshalTimestampRFC3339Nano(nil, start) startStr := marshalTimestampRFC3339NanoString(nil, start)
endStr := marshalTimestampRFC3339Nano(nil, end) endStr := marshalTimestampRFC3339NanoString(nil, end)
ft := &filterTime{ ft := &filterTime{
minTimestamp: start, minTimestamp: start,
maxTimestamp: end, maxTimestamp: end,
@ -234,7 +283,7 @@ func (q *Query) AddTimeFilter(start, end int64) {
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe // See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
func (q *Query) AddPipeLimit(n uint64) { func (q *Query) AddPipeLimit(n uint64) {
q.pipes = append(q.pipes, &pipeLimit{ q.pipes = append(q.pipes, &pipeLimit{
n: n, limit: n,
}) })
} }
@ -242,6 +291,68 @@ func (q *Query) AddPipeLimit(n uint64) {
func (q *Query) Optimize() { func (q *Query) Optimize() {
q.pipes = optimizeSortOffsetPipes(q.pipes) q.pipes = optimizeSortOffsetPipes(q.pipes)
q.pipes = optimizeSortLimitPipes(q.pipes) q.pipes = optimizeSortLimitPipes(q.pipes)
q.pipes = optimizeUniqLimitPipes(q.pipes)
q.pipes = optimizeFilterPipes(q.pipes)
// Merge `q | filter ...` into q.
if len(q.pipes) > 0 {
pf, ok := q.pipes[0].(*pipeFilter)
if ok {
q.f = mergeFiltersAnd(q.f, pf.f)
q.pipes = append(q.pipes[:0], q.pipes[1:]...)
}
}
// Optimize `q | field_names ...` by marking pipeFieldNames as first pipe.
if len(q.pipes) > 0 {
pf, ok := q.pipes[0].(*pipeFieldNames)
if ok {
pf.isFirstPipe = true
}
}
// Substitute '*' prefixFilter with filterNoop in order to avoid reading _msg data.
q.f = removeStarFilters(q.f)
// Call Optimize for queries from 'in(query)' filters.
optimizeFilterIn(q.f)
for _, p := range q.pipes {
switch t := p.(type) {
case *pipeStats:
for _, f := range t.funcs {
if f.iff != nil {
optimizeFilterIn(f.iff)
}
}
}
}
}
func removeStarFilters(f filter) filter {
visitFunc := func(f filter) bool {
fp, ok := f.(*filterPrefix)
return ok && isMsgFieldName(fp.fieldName) && fp.prefix == ""
}
copyFunc := func(_ filter) (filter, error) {
fn := &filterNoop{}
return fn, nil
}
f, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
logger.Fatalf("BUG: unexpected error: %s", err)
}
return f
}
func optimizeFilterIn(f filter) {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
if ok && fi.q != nil {
fi.q.Optimize()
}
return false
}
_ = visitFilter(f, visitFunc)
} }
func optimizeSortOffsetPipes(pipes []pipe) []pipe { func optimizeSortOffsetPipes(pipes []pipe) []pipe {
@ -259,7 +370,7 @@ func optimizeSortOffsetPipes(pipes []pipe) []pipe {
continue continue
} }
if ps.offset == 0 && ps.limit == 0 { if ps.offset == 0 && ps.limit == 0 {
ps.offset = po.n ps.offset = po.offset
} }
pipes = append(pipes[:i], pipes[i+1:]...) pipes = append(pipes[:i], pipes[i+1:]...)
} }
@ -280,14 +391,78 @@ func optimizeSortLimitPipes(pipes []pipe) []pipe {
i++ i++
continue continue
} }
if ps.limit == 0 || pl.n < ps.limit { if ps.limit == 0 || pl.limit < ps.limit {
ps.limit = pl.n ps.limit = pl.limit
} }
pipes = append(pipes[:i], pipes[i+1:]...) pipes = append(pipes[:i], pipes[i+1:]...)
} }
return pipes return pipes
} }
func optimizeUniqLimitPipes(pipes []pipe) []pipe {
// Merge 'uniq ... | limit ...' into 'uniq ... limit ...'
i := 1
for i < len(pipes) {
pl, ok := pipes[i].(*pipeLimit)
if !ok {
i++
continue
}
pu, ok := pipes[i-1].(*pipeUniq)
if !ok {
i++
continue
}
if pu.limit == 0 || pl.limit < pu.limit {
pu.limit = pl.limit
}
pipes = append(pipes[:i], pipes[i+1:]...)
}
return pipes
}
func optimizeFilterPipes(pipes []pipe) []pipe {
// Merge multiple `| filter ...` pipes into a single `filter ...` pipe
i := 1
for i < len(pipes) {
pf1, ok := pipes[i-1].(*pipeFilter)
if !ok {
i++
continue
}
pf2, ok := pipes[i].(*pipeFilter)
if !ok {
i++
continue
}
pf1.f = mergeFiltersAnd(pf1.f, pf2.f)
pipes = append(pipes[:i], pipes[i+1:]...)
}
return pipes
}
func mergeFiltersAnd(f1, f2 filter) filter {
fa1, ok := f1.(*filterAnd)
if ok {
fa1.filters = append(fa1.filters, f2)
return fa1
}
fa2, ok := f2.(*filterAnd)
if ok {
filters := make([]filter, len(fa2.filters)+1)
filters[0] = f1
copy(filters[1:], fa2.filters)
fa2.filters = filters
return fa2
}
return &filterAnd{
filters: []filter{f1, f2},
}
}
func (q *Query) getNeededColumns() ([]string, []string) { func (q *Query) getNeededColumns() ([]string, []string) {
neededFields := newFieldsSet() neededFields := newFieldsSet()
neededFields.add("*") neededFields.add("*")
@ -304,7 +479,17 @@ func (q *Query) getNeededColumns() ([]string, []string) {
// ParseQuery parses s. // ParseQuery parses s.
func ParseQuery(s string) (*Query, error) { func ParseQuery(s string) (*Query, error) {
lex := newLexer(s) lex := newLexer(s)
q, err := parseQuery(lex)
if err != nil {
return nil, err
}
if !lex.isEnd() {
return nil, fmt.Errorf("unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]", q, lex.context(), lex.s)
}
return q, nil
}
func parseQuery(lex *lexer) (*Query, error) {
f, err := parseFilter(lex) f, err := parseFilter(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("%w; context: [%s]", err, lex.context()) return nil, fmt.Errorf("%w; context: [%s]", err, lex.context())
@ -319,10 +504,6 @@ func ParseQuery(s string) (*Query, error) {
} }
q.pipes = pipes q.pipes = pipes
if !lex.isEnd() {
return nil, fmt.Errorf("unexpected unparsed tail; context: [%s]; tail: [%s]", lex.context(), lex.s)
}
return q, nil return q, nil
} }
@ -407,6 +588,10 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken) return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken)
} }
return parseParensFilter(lex, fieldName) return parseParensFilter(lex, fieldName)
case lex.isKeyword(">"):
return parseFilterGT(lex, fieldName)
case lex.isKeyword("<"):
return parseFilterLT(lex, fieldName)
case lex.isKeyword("not", "!"): case lex.isKeyword("not", "!"):
return parseFilterNot(lex, fieldName) return parseFilterNot(lex, fieldName)
case lex.isKeyword("exact"): case lex.isKeyword("exact"):
@ -432,19 +617,27 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
case lex.isKeyword(",", ")", "[", "]"): case lex.isKeyword(",", ")", "[", "]"):
return nil, fmt.Errorf("unexpected token %q", lex.token) return nil, fmt.Errorf("unexpected token %q", lex.token)
} }
phrase := getCompoundPhrase(lex, fieldName != "") phrase, err := getCompoundPhrase(lex, fieldName != "")
if err != nil {
return nil, err
}
return parseFilterForPhrase(lex, phrase, fieldName) return parseFilterForPhrase(lex, phrase, fieldName)
} }
func getCompoundPhrase(lex *lexer, allowColon bool) string { func getCompoundPhrase(lex *lexer, allowColon bool) (string, error) {
stopTokens := []string{"*", ",", "(", ")", "[", "]", "|", ""}
if lex.isKeyword(stopTokens...) {
return "", fmt.Errorf("compound phrase cannot start with '%s'", lex.token)
}
phrase := lex.token phrase := lex.token
rawPhrase := lex.rawToken rawPhrase := lex.rawToken
lex.nextToken() lex.nextToken()
suffix := getCompoundSuffix(lex, allowColon) suffix := getCompoundSuffix(lex, allowColon)
if suffix == "" { if suffix == "" {
return phrase return phrase, nil
} }
return rawPhrase + suffix return rawPhrase + suffix, nil
} }
func getCompoundSuffix(lex *lexer, allowColon bool) string { func getCompoundSuffix(lex *lexer, allowColon bool) string {
@ -460,19 +653,24 @@ func getCompoundSuffix(lex *lexer, allowColon bool) string {
return s return s
} }
func getCompoundToken(lex *lexer) string { func getCompoundToken(lex *lexer) (string, error) {
stopTokens := []string{",", "(", ")", "[", "]", "|", ""}
if lex.isKeyword(stopTokens...) {
return "", fmt.Errorf("compound token cannot start with '%s'", lex.token)
}
s := lex.token s := lex.token
rawS := lex.rawToken rawS := lex.rawToken
lex.nextToken() lex.nextToken()
suffix := "" suffix := ""
for !lex.isSkippedSpace && !lex.isKeyword(",", "(", ")", "[", "]", "|", "") { for !lex.isSkippedSpace && !lex.isKeyword(stopTokens...) {
s += lex.token s += lex.token
lex.nextToken() lex.nextToken()
} }
if suffix == "" { if suffix == "" {
return s return s, nil
} }
return rawS + suffix return rawS + suffix, nil
} }
func getCompoundFuncArg(lex *lexer) string { func getCompoundFuncArg(lex *lexer) string {
@ -483,7 +681,7 @@ func getCompoundFuncArg(lex *lexer) string {
rawArg := lex.rawToken rawArg := lex.rawToken
lex.nextToken() lex.nextToken()
suffix := "" suffix := ""
for !lex.isSkippedSpace && !lex.isKeyword("*", ",", ")", "") { for !lex.isSkippedSpace && !lex.isKeyword("*", ",", "(", ")", "|", "") {
suffix += lex.rawToken suffix += lex.rawToken
lex.nextToken() lex.nextToken()
} }
@ -704,13 +902,72 @@ func tryParseIPv4CIDR(s string) (uint32, uint32, bool) {
} }
func parseFilterIn(lex *lexer, fieldName string) (filter, error) { func parseFilterIn(lex *lexer, fieldName string) (filter, error) {
return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { if !lex.isKeyword("in") {
f := &filterIn{ return nil, fmt.Errorf("expecting 'in' keyword")
}
// Try parsing in(arg1, ..., argN) at first
lexState := lex.backupState()
fi, err := parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
fi := &filterIn{
fieldName: fieldName, fieldName: fieldName,
values: args, values: args,
} }
return f, nil return fi, nil
}) })
if err == nil {
return fi, nil
}
// Parse in(query | fields someField) then
lex.restoreState(lexState)
lex.nextToken()
if !lex.isKeyword("(") {
return nil, fmt.Errorf("missing '(' after 'in'")
}
lex.nextToken()
q, err := parseQuery(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse query inside 'in(...)': %w", err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("missing ')' after 'in(%s)'", q)
}
lex.nextToken()
qFieldName, err := getFieldNameFromPipes(q.pipes)
if err != nil {
return nil, fmt.Errorf("cannot determine field name for values in 'in(%s)': %w", q, err)
}
fi = &filterIn{
fieldName: fieldName,
needExecuteQuery: true,
q: q,
qFieldName: qFieldName,
}
return fi, nil
}
func getFieldNameFromPipes(pipes []pipe) (string, error) {
if len(pipes) == 0 {
return "", fmt.Errorf("missing 'fields' or 'uniq' pipes at the end of query")
}
switch t := pipes[len(pipes)-1].(type) {
case *pipeFields:
if t.containsStar || len(t.fields) != 1 {
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
}
return t.fields[0], nil
case *pipeUniq:
if len(t.byFields) != 1 {
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
}
return t.byFields[0], nil
default:
return "", fmt.Errorf("missing 'fields' or 'uniq' pipe at the end of query")
}
} }
func parseFilterSequence(lex *lexer, fieldName string) (filter, error) { func parseFilterSequence(lex *lexer, fieldName string) (filter, error) {
@ -755,6 +1012,70 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
}) })
} }
func parseFilterGT(lex *lexer, fieldName string) (filter, error) {
if fieldName == "" {
return nil, fmt.Errorf("'>' and '>=' must be prefixed with the field name")
}
lex.nextToken()
includeMinValue := false
op := ">"
if lex.isKeyword("=") {
lex.nextToken()
includeMinValue = true
op = ">="
}
minValue, fStr, err := parseFloat64(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
}
if !includeMinValue {
minValue = nextafter(minValue, inf)
}
fr := &filterRange{
fieldName: fieldName,
minValue: minValue,
maxValue: inf,
stringRepr: op + fStr,
}
return fr, nil
}
func parseFilterLT(lex *lexer, fieldName string) (filter, error) {
if fieldName == "" {
return nil, fmt.Errorf("'<' and '<=' must be prefixed with the field name")
}
lex.nextToken()
includeMaxValue := false
op := "<"
if lex.isKeyword("=") {
lex.nextToken()
includeMaxValue = true
op = "<="
}
maxValue, fStr, err := parseFloat64(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
}
if !includeMaxValue {
maxValue = nextafter(maxValue, -inf)
}
fr := &filterRange{
fieldName: fieldName,
minValue: -inf,
maxValue: maxValue,
stringRepr: op + fStr,
}
return fr, nil
}
func parseFilterRange(lex *lexer, fieldName string) (filter, error) { func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token funcName := lex.token
lex.nextToken() lex.nextToken()
@ -802,19 +1123,19 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
} }
lex.nextToken() lex.nextToken()
stringRepr := "" stringRepr := "range"
if includeMinValue { if includeMinValue {
stringRepr += "[" stringRepr += "["
} else { } else {
stringRepr += "(" stringRepr += "("
minValue = math.Nextafter(minValue, inf) minValue = nextafter(minValue, inf)
} }
stringRepr += minValueStr + ", " + maxValueStr stringRepr += minValueStr + ", " + maxValueStr
if includeMaxValue { if includeMaxValue {
stringRepr += "]" stringRepr += "]"
} else { } else {
stringRepr += ")" stringRepr += ")"
maxValue = math.Nextafter(maxValue, -inf) maxValue = nextafter(maxValue, -inf)
} }
fr := &filterRange{ fr := &filterRange{
@ -828,7 +1149,10 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
} }
func parseFloat64(lex *lexer) (float64, string, error) { func parseFloat64(lex *lexer) (float64, string, error) {
s := getCompoundToken(lex) s, err := getCompoundToken(lex)
if err != nil {
return 0, "", fmt.Errorf("cannot parse float64: %w", err)
}
f, err := strconv.ParseFloat(s, 64) f, err := strconv.ParseFloat(s, 64)
if err == nil { if err == nil {
return f, s, nil return f, s, nil
@ -868,6 +1192,9 @@ func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (f
if lex.isKeyword(",") { if lex.isKeyword(",") {
return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName) return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName)
} }
if lex.isKeyword("(") {
return nil, fmt.Errorf("unexpected '(' - missing arg in %s()", funcName)
}
arg := getCompoundFuncArg(lex) arg := getCompoundFuncArg(lex)
args = append(args, arg) args = append(args, arg)
if lex.isKeyword(")") { if lex.isKeyword(")") {
@ -912,13 +1239,14 @@ func parseFilterTimeWithOffset(lex *lexer) (*filterTime, error) {
if !lex.isKeyword("offset") { if !lex.isKeyword("offset") {
return ft, nil return ft, nil
} }
if !lex.mustNextToken() { lex.nextToken()
return nil, fmt.Errorf("missing offset for _time filter %s", ft) s, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse offset in _time filter: %w", err)
} }
s := getCompoundToken(lex)
d, ok := tryParseDuration(s) d, ok := tryParseDuration(s)
if !ok { if !ok {
return nil, fmt.Errorf("cannot parse offset %q for _time filter %s: %w", s, ft, err) return nil, fmt.Errorf("cannot parse offset %q for _time filter %s", s, ft)
} }
offset := int64(d) offset := int64(d)
ft.minTimestamp -= offset ft.minTimestamp -= offset
@ -935,7 +1263,10 @@ func parseFilterTime(lex *lexer) (*filterTime, error) {
case lex.isKeyword("("): case lex.isKeyword("("):
startTimeInclude = false startTimeInclude = false
default: default:
s := getCompoundToken(lex) s, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse _time filter: %w", err)
}
sLower := strings.ToLower(s) sLower := strings.ToLower(s)
if sLower == "now" || startsWithYear(s) { if sLower == "now" || startsWithYear(s) {
// Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)' // Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
@ -1076,113 +1407,21 @@ func stripTimezoneSuffix(s string) string {
} }
func parseFilterStream(lex *lexer) (*filterStream, error) { func parseFilterStream(lex *lexer) (*filterStream, error) {
if !lex.isKeyword("{") { sf, err := parseStreamFilter(lex)
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after '{'")
}
var filters []*andStreamFilter
for {
f, err := parseAndStreamFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("}"):
lex.nextToken()
fs := &filterStream{
f: &StreamFilter{
orFilters: filters,
},
}
return fs, nil
case lex.isKeyword("or"):
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
}
if lex.isKeyword("}") {
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
}
default:
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
}
}
}
func newStreamFilter(s string) (*StreamFilter, error) {
lex := newLexer(s)
fs, err := parseFilterStream(lex)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return fs.f, nil fs := &filterStream{
} f: sf,
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
var filters []*streamTagFilter
for {
if lex.isKeyword("}") {
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
}
f, err := parseStreamTagFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("or", "}"):
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
case lex.isKeyword(","):
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing stream filter after ','")
}
default:
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
}
} }
} return fs, nil
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
tagName := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
}
if !lex.isKeyword("=", "!=", "=~", "!~") {
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
}
op := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
}
value := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
}
stf := &streamTagFilter{
tagName: tagName,
op: op,
value: value,
}
if op == "=~" || op == "!~" {
re, err := regexutil.NewPromRegex(value)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
}
stf.regexp = re
}
return stf, nil
} }
func parseTime(lex *lexer) (int64, string, error) { func parseTime(lex *lexer) (int64, string, error) {
s := getCompoundToken(lex) s, err := getCompoundToken(lex)
if err != nil {
return 0, "", err
}
t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9) t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9)
if err != nil { if err != nil {
return 0, "", err return 0, "", err
@ -1312,3 +1551,10 @@ func parseInt(s string) (int64, error) {
} }
return nn, nil return nn, nil
} }
func nextafter(f, xInf float64) float64 {
if math.IsInf(f, 0) {
return f
}
return math.Nextafter(f, xInf)
}

View file

@ -1,7 +1,6 @@
package logstorage package logstorage
import ( import (
"math"
"reflect" "reflect"
"strings" "strings"
"testing" "testing"
@ -34,51 +33,6 @@ func TestLexer(t *testing.T) {
[]string{"_stream", ":", "{", "foo", "=", "bar", ",", "a", "=~", "baz", ",", "b", "!=", "cd", ",", "d,}a", "!~", "abc", "}"}) []string{"_stream", ":", "{", "foo", "=", "bar", ",", "a", "=~", "baz", ",", "b", "!=", "cd", ",", "d,}a", "!~", "abc", "}"})
} }
func TestNewStreamFilterSuccess(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
sf, err := newStreamFilter(s)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
result := sf.String()
if result != resultExpected {
t.Fatalf("unexpected StreamFilter; got %s; want %s", result, resultExpected)
}
}
f("{}", "{}")
f(`{foo="bar"}`, `{foo="bar"}`)
f(`{ "foo" =~ "bar.+" , baz!="a" or x="y"}`, `{foo=~"bar.+",baz!="a" or x="y"}`)
f(`{"a b"='c}"d' OR de="aaa"}`, `{"a b"="c}\"d" or de="aaa"}`)
f(`{a="b", c="d" or x="y"}`, `{a="b",c="d" or x="y"}`)
}
func TestNewStreamFilterFailure(t *testing.T) {
f := func(s string) {
t.Helper()
sf, err := newStreamFilter(s)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if sf != nil {
t.Fatalf("expecting nil sf; got %v", sf)
}
}
f("")
f("}")
f("{")
f("{foo")
f("{foo}")
f("{'foo")
f("{foo=")
f("{foo or bar}")
f("{foo=bar")
f("{foo=bar baz}")
f("{foo='bar' baz='x'}")
}
func TestParseTimeDuration(t *testing.T) { func TestParseTimeDuration(t *testing.T) {
f := func(s string, durationExpected time.Duration) { f := func(s string, durationExpected time.Duration) {
t.Helper() t.Helper()
@ -323,6 +277,10 @@ func TestParseFilterIn(t *testing.T) {
f(`:in("foo bar,baz")`, ``, []string{"foo bar,baz"}) f(`:in("foo bar,baz")`, ``, []string{"foo bar,baz"})
f(`ip:in(1.2.3.4, 5.6.7.8, 9.10.11.12)`, `ip`, []string{"1.2.3.4", "5.6.7.8", "9.10.11.12"}) f(`ip:in(1.2.3.4, 5.6.7.8, 9.10.11.12)`, `ip`, []string{"1.2.3.4", "5.6.7.8", "9.10.11.12"})
f(`foo-bar:in(foo,bar-baz.aa"bb","c,)d")`, `foo-bar`, []string{"foo", `bar-baz.aa"bb"`, "c,)d"}) f(`foo-bar:in(foo,bar-baz.aa"bb","c,)d")`, `foo-bar`, []string{"foo", `bar-baz.aa"bb"`, "c,)d"})
// verify `in(query)` - it shouldn't set values
f(`in(x|fields foo)`, ``, nil)
f(`a:in(* | fields bar)`, `a`, nil)
} }
func TestParseFilterIPv4Range(t *testing.T) { func TestParseFilterIPv4Range(t *testing.T) {
@ -537,15 +495,25 @@ func TestParseRangeFilter(t *testing.T) {
f(`range:range["-1.234e5", "-2e-5"]`, `range`, -1.234e5, -2e-5) f(`range:range["-1.234e5", "-2e-5"]`, `range`, -1.234e5, -2e-5)
f(`_msg:range[1, 2]`, `_msg`, 1, 2) f(`_msg:range[1, 2]`, `_msg`, 1, 2)
f(`:range(1, 2)`, ``, math.Nextafter(1, inf), math.Nextafter(2, -inf)) f(`:range(1, 2)`, ``, nextafter(1, inf), nextafter(2, -inf))
f(`range[1, 2)`, ``, 1, math.Nextafter(2, -inf)) f(`range[1, 2)`, ``, 1, nextafter(2, -inf))
f(`range("1", 2]`, ``, math.Nextafter(1, inf), 2) f(`range("1", 2]`, ``, nextafter(1, inf), 2)
f(`response_size:range[1KB, 10MiB]`, `response_size`, 1_000, 10*(1<<20)) f(`response_size:range[1KB, 10MiB]`, `response_size`, 1_000, 10*(1<<20))
f(`response_size:range[1G, 10Ti]`, `response_size`, 1_000_000_000, 10*(1<<40)) f(`response_size:range[1G, 10Ti]`, `response_size`, 1_000_000_000, 10*(1<<40))
f(`response_size:range[10, inf]`, `response_size`, 10, inf) f(`response_size:range[10, inf]`, `response_size`, 10, inf)
f(`duration:range[100ns, 1y2w2.5m3s5ms]`, `duration`, 100, 1*nsecsPerYear+2*nsecsPerWeek+2.5*nsecsPerMinute+3*nsecsPerSecond+5*nsecsPerMillisecond) f(`duration:range[100ns, 1y2w2.5m3s5ms]`, `duration`, 100, 1*nsecsPerYear+2*nsecsPerWeek+2.5*nsecsPerMinute+3*nsecsPerSecond+5*nsecsPerMillisecond)
f(`foo:>10.43`, `foo`, nextafter(10.43, inf), inf)
f(`foo: > -10.43`, `foo`, nextafter(-10.43, inf), inf)
f(`foo:>=10.43`, `foo`, 10.43, inf)
f(`foo: >= -10.43`, `foo`, -10.43, inf)
f(`foo:<10.43`, `foo`, -inf, nextafter(10.43, -inf))
f(`foo: < -10.43`, `foo`, -inf, nextafter(-10.43, -inf))
f(`foo:<=10.43`, `foo`, -inf, 10.43)
f(`foo: <= 10.43`, `foo`, -inf, 10.43)
} }
func TestParseQuerySuccess(t *testing.T) { func TestParseQuerySuccess(t *testing.T) {
@ -723,8 +691,8 @@ func TestParseQuerySuccess(t *testing.T) {
f("exact(foo*)", `exact(foo*)`) f("exact(foo*)", `exact(foo*)`)
f("exact('foo bar),|baz')", `exact("foo bar),|baz")`) f("exact('foo bar),|baz')", `exact("foo bar),|baz")`)
f("exact('foo bar),|baz'*)", `exact("foo bar),|baz"*)`) f("exact('foo bar),|baz'*)", `exact("foo bar),|baz"*)`)
f(`exact(foo|b:ar)`, `exact("foo|b:ar")`) f(`exact(foo/b:ar)`, `exact("foo/b:ar")`)
f(`foo:exact(foo|b:ar*)`, `foo:exact("foo|b:ar"*)`) f(`foo:exact(foo/b:ar*)`, `foo:exact("foo/b:ar"*)`)
// i filter // i filter
f("i(foo)", `i(foo)`) f("i(foo)", `i(foo)`)
@ -732,14 +700,21 @@ func TestParseQuerySuccess(t *testing.T) {
f("i(`foo`* )", `i(foo*)`) f("i(`foo`* )", `i(foo*)`)
f("i(' foo ) bar')", `i(" foo ) bar")`) f("i(' foo ) bar')", `i(" foo ) bar")`)
f("i('foo bar'*)", `i("foo bar"*)`) f("i('foo bar'*)", `i("foo bar"*)`)
f(`foo:i(foo:bar-baz|aa+bb)`, `foo:i("foo:bar-baz|aa+bb")`) f(`foo:i(foo:bar-baz/aa+bb)`, `foo:i("foo:bar-baz/aa+bb")`)
// in filter // in filter with values
f(`in()`, `in()`) f(`in()`, `in()`)
f(`in(foo)`, `in(foo)`) f(`in(foo)`, `in(foo)`)
f(`in(foo, bar)`, `in(foo,bar)`) f(`in(foo, bar)`, `in(foo,bar)`)
f(`in("foo bar", baz)`, `in("foo bar",baz)`) f(`in("foo bar", baz)`, `in("foo bar",baz)`)
f(`foo:in(foo-bar|baz)`, `foo:in("foo-bar|baz")`) f(`foo:in(foo-bar/baz)`, `foo:in("foo-bar/baz")`)
// in filter with query
f(`in(err|fields x)`, `in(err | fields x)`)
f(`ip:in(foo and user:in(admin, moderator)|fields ip)`, `ip:in(foo user:in(admin,moderator) | fields ip)`)
f(`x:in(_time:5m y:in(*|fields z) | stats by (q) count() rows|fields q)`, `x:in(_time:5m y:in(* | fields z) | stats by (q) count(*) as rows | fields q)`)
f(`in(bar:in(1,2,3) | uniq (x)) | stats count() rows`, `in(bar:in(1,2,3) | uniq by (x)) | stats count(*) as rows`)
f(`in((1) | fields z) | stats count() rows`, `in(1 | fields z) | stats count(*) as rows`)
// ipv4_range filter // ipv4_range filter
f(`ipv4_range(1.2.3.4, "5.6.7.8")`, `ipv4_range(1.2.3.4, 5.6.7.8)`) f(`ipv4_range(1.2.3.4, "5.6.7.8")`, `ipv4_range(1.2.3.4, 5.6.7.8)`)
@ -768,11 +743,18 @@ func TestParseQuerySuccess(t *testing.T) {
f(`range(0x1ff, inf)`, `range(0x1ff, inf)`) f(`range(0x1ff, inf)`, `range(0x1ff, inf)`)
f(`range(-INF,+inF)`, `range(-INF, +inF)`) f(`range(-INF,+inF)`, `range(-INF, +inF)`)
f(`range(1.5K, 22.5GiB)`, `range(1.5K, 22.5GiB)`) f(`range(1.5K, 22.5GiB)`, `range(1.5K, 22.5GiB)`)
f(`foo:range(5,inf)`, `foo:range(5, inf)`)
// >, >=, < and <= filter
f(`foo: > 10.5M`, `foo:>10.5M`)
f(`foo: >= 10.5M`, `foo:>=10.5M`)
f(`foo: < 10.5M`, `foo:<10.5M`)
f(`foo: <= 10.5M`, `foo:<=10.5M`)
// re filter // re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`) f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)
f("re(foo)", `re("foo")`) f("re(foo)", `re("foo")`)
f(`foo:re(foo-bar|baz.)`, `foo:re("foo-bar|baz.")`) f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`)
// seq filter // seq filter
f(`seq()`, `seq()`) f(`seq()`, `seq()`)
@ -829,6 +811,10 @@ func TestParseQuerySuccess(t *testing.T) {
// multiple fields pipes // multiple fields pipes
f(`foo | fields bar | fields baz, abc`, `foo | fields bar | fields baz, abc`) f(`foo | fields bar | fields baz, abc`, `foo | fields bar | fields baz, abc`)
// field_names pipe
f(`foo | field_names as x`, `foo | field_names as x`)
f(`foo | field_names y`, `foo | field_names as y`)
// copy and cp pipe // copy and cp pipe
f(`* | copy foo as bar`, `* | copy foo as bar`) f(`* | copy foo as bar`, `* | copy foo as bar`)
f(`* | cp foo bar`, `* | copy foo as bar`) f(`* | cp foo bar`, `* | copy foo as bar`)
@ -966,6 +952,16 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | stats by (_time:week) count() foo`, `* | stats by (_time:week) count(*) as foo`) f(`* | stats by (_time:week) count() foo`, `* | stats by (_time:week) count(*) as foo`)
f(`* | stats by (_time:month) count() foo`, `* | stats by (_time:month) count(*) as foo`) f(`* | stats by (_time:month) count() foo`, `* | stats by (_time:month) count(*) as foo`)
f(`* | stats by (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`) f(`* | stats by (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
f(`* | stats (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
// stats pipe with per-func filters
f(`* | stats count() if (foo bar) rows`, `* | stats count(*) if (foo bar) as rows`)
f(`* | stats by (_time:1d offset -2h, f2)
count() if (is_admin:true or _msg:"foo bar"*) as foo,
sum(duration) if (host:in('foo.com', 'bar.com') and path:/foobar) as bar`,
`* | stats by (_time:1d offset -2h, f2) count(*) if (is_admin:true or "foo bar"*) as foo, sum(duration) if (host:in(foo.com,bar.com) path:"/foobar") as bar`)
f(`* | stats count(x) if (error ip:in(_time:1d | fields ip)) rows`, `* | stats count(x) if (error ip:in(_time:1d | fields ip)) as rows`)
f(`* | stats count() if () rows`, `* | stats count(*) if () as rows`)
// sort pipe // sort pipe
f(`* | sort`, `* | sort`) f(`* | sort`, `* | sort`)
@ -983,6 +979,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | sort by (foo desc, bar) desc limit 10`, `* | sort by (foo desc, bar) desc limit 10`) f(`* | sort by (foo desc, bar) desc limit 10`, `* | sort by (foo desc, bar) desc limit 10`)
f(`* | sort by (foo desc, bar) desc OFFSET 30 limit 10`, `* | sort by (foo desc, bar) desc offset 30 limit 10`) f(`* | sort by (foo desc, bar) desc OFFSET 30 limit 10`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
f(`* | sort by (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`) f(`* | sort by (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
f(`* | sort (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
// uniq pipe // uniq pipe
f(`* | uniq`, `* | uniq`) f(`* | uniq`, `* | uniq`)
@ -991,8 +988,32 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | uniq by(foo,*,bar)`, `* | uniq`) f(`* | uniq by(foo,*,bar)`, `* | uniq`)
f(`* | uniq by(f1,f2)`, `* | uniq by (f1, f2)`) f(`* | uniq by(f1,f2)`, `* | uniq by (f1, f2)`)
f(`* | uniq by(f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`) f(`* | uniq by(f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
f(`* | uniq (f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
f(`* | uniq limit 10`, `* | uniq limit 10`) f(`* | uniq limit 10`, `* | uniq limit 10`)
// filter pipe
f(`* | filter error ip:12.3.4.5 or warn`, `* | filter error ip:12.3.4.5 or warn`)
f(`foo | stats by (host) count() logs | filter logs:>50 | sort by (logs desc) | limit 10`, `foo | stats by (host) count(*) as logs | filter logs:>50 | sort by (logs desc) | limit 10`)
// extract pipe
f(`* | extract "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
f(`* | extract from _msg "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
f(`* | extract from '' 'foo<bar>baz'`, `* | extract "foo<bar>baz"`)
f("* | extract from x `foo<bar>baz`", `* | extract from x "foo<bar>baz"`)
f("* | extract from x foo<bar>baz", `* | extract from x "foo<bar>baz"`)
// unpack_json pipe
f(`* | unpack_json`, `* | unpack_json`)
f(`* | unpack_json result_prefix y`, `* | unpack_json result_prefix y`)
f(`* | unpack_json from x`, `* | unpack_json from x`)
f(`* | unpack_json from x result_prefix y`, `* | unpack_json from x result_prefix y`)
// unpack_logfmt pipe
f(`* | unpack_logfmt`, `* | unpack_logfmt`)
f(`* | unpack_logfmt result_prefix y`, `* | unpack_logfmt result_prefix y`)
f(`* | unpack_logfmt from x`, `* | unpack_logfmt from x`)
f(`* | unpack_logfmt from x result_prefix y`, `* | unpack_logfmt from x result_prefix y`)
// multiple different pipes // multiple different pipes
f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`) f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`)
f(`* | skip 100 | head 20 | skip 10`, `* | offset 100 | limit 20 | offset 10`) f(`* | skip 100 | head 20 | skip 10`, `* | offset 100 | limit 20 | offset 10`)
@ -1130,6 +1151,10 @@ func TestParseQueryFailure(t *testing.T) {
f(`in(foo, "bar baz"*, abc)`) f(`in(foo, "bar baz"*, abc)`)
f(`in(foo bar)`) f(`in(foo bar)`)
f(`in(foo, bar`) f(`in(foo, bar`)
f(`in(foo|bar)`)
f(`in(|foo`)
f(`in(x | limit 10)`)
f(`in(x | fields a,b)`)
// invalid ipv4_range // invalid ipv4_range
f(`ipv4_range(`) f(`ipv4_range(`)
@ -1208,6 +1233,18 @@ func TestParseQueryFailure(t *testing.T) {
f(`foo | fields bar,`) f(`foo | fields bar,`)
f(`foo | fields bar,,`) f(`foo | fields bar,,`)
// invalid field_names
f(`foo | field_names`)
f(`foo | field_names |`)
f(`foo | field_names (`)
f(`foo | field_names )`)
f(`foo | field_names ,`)
f(`foo | field_names ()`)
f(`foo | field_names (x)`)
f(`foo | field_names (x,y)`)
f(`foo | field_names x y`)
f(`foo | field_names x, y`)
// invalid copy and cp pipe // invalid copy and cp pipe
f(`foo | copy`) f(`foo | copy`)
f(`foo | cp`) f(`foo | cp`)
@ -1359,6 +1396,39 @@ func TestParseQueryFailure(t *testing.T) {
f(`foo | uniq by(a) bar`) f(`foo | uniq by(a) bar`)
f(`foo | uniq by(a) limit -10`) f(`foo | uniq by(a) limit -10`)
f(`foo | uniq by(a) limit foo`) f(`foo | uniq by(a) limit foo`)
// invalid filter pipe
f(`foo | filter`)
f(`foo | filter | sort by (x)`)
f(`foo | filter (`)
f(`foo | filter )`)
// invalid extract pipe
f(`foo | extract`)
f(`foo | extract bar`)
f(`foo | extract "xy"`)
f(`foo | extract "<>"`)
f(`foo | extract "foo<>foo"`)
f(`foo | extract "foo<>foo<_>bar<*>asdf"`)
f(`foo | extract from`)
f(`foo | extract from x`)
f(`foo | extract from x "abc"`)
f(`foo | extract from x "<abc`)
f(`foo | extract from x "<abc>" de`)
// invalid unpack_json pipe
f(`foo | unpack_json bar`)
f(`foo | unpack_json from`)
f(`foo | unpack_json result_prefix`)
f(`foo | unpack_json result_prefix x from y`)
f(`foo | unpack_json from x result_prefix`)
// invalid unpack_logfmt pipe
f(`foo | unpack_logfmt bar`)
f(`foo | unpack_logfmt from`)
f(`foo | unpack_logfmt result_prefix`)
f(`foo | unpack_logfmt result_prefix x from y`)
f(`foo | unpack_logfmt from x result_prefix`)
} }
func TestQueryGetNeededColumns(t *testing.T) { func TestQueryGetNeededColumns(t *testing.T) {
@ -1367,8 +1437,9 @@ func TestQueryGetNeededColumns(t *testing.T) {
q, err := ParseQuery(s) q, err := ParseQuery(s)
if err != nil { if err != nil {
t.Fatalf("cannot parse query %s: %s", s, err) t.Fatalf("cannot parse query [%s]: %s", s, err)
} }
q.Optimize()
needed, unneeded := q.getNeededColumns() needed, unneeded := q.getNeededColumns()
neededColumns := strings.Join(needed, ",") neededColumns := strings.Join(needed, ",")
@ -1454,11 +1525,10 @@ func TestQueryGetNeededColumns(t *testing.T) {
f(`* | sort by (f1) | sort by (f2,f3 desc) desc | fields f4 | rm f1,f2,f5`, `f1,f2,f3,f4`, ``) f(`* | sort by (f1) | sort by (f2,f3 desc) desc | fields f4 | rm f1,f2,f5`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2`, `f1,f2,f3,f4`, ``) f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, ``, ``) f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, `f1`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f1,f5`, `f1`, ``) f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f1,f5`, `f1`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r1`, `f1,f2`, ``) f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r1`, `f1,f2`, ``)
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r2,r3`, `f1,f3,f4`, ``) f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r2,r3`, `f1,f3,f4`, ``)
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
f(`* | stats count(f1) r1 | stats count() r1`, ``, ``) f(`* | stats count(f1) r1 | stats count() r1`, ``, ``)
f(`* | stats count(f1) r1 | stats count() r2`, ``, ``) f(`* | stats count(f1) r1 | stats count() r2`, ``, ``)
f(`* | stats count(f1) r1 | stats count(r1) r2`, `f1`, ``) f(`* | stats count(f1) r1 | stats count(r1) r2`, `f1`, ``)
@ -1470,12 +1540,60 @@ func TestQueryGetNeededColumns(t *testing.T) {
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f2) r1, count(r1) r2 | fields r2`, `f1,f2,f3,f4`, ``) f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f2) r1, count(r1) r2 | fields r2`, `f1,f2,f3,f4`, ``)
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f3) r1, count(r1) r2 | fields r1`, `f3,f4`, ``) f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f3) r1, count(r1) r2 | fields r1`, `f3,f4`, ``)
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
f(`_time:1y | stats (_time:1w) count() r1 | stats count() r2`, `_time`, ``)
f(`* | uniq`, `*`, ``) f(`* | uniq`, `*`, ``)
f(`* | uniq by (f1,f2)`, `f1,f2`, ``) f(`* | uniq by (f1,f2)`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | fields f1,f3`, `f1,f2`, ``) f(`* | uniq by (f1,f2) | fields f1,f3`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | rm f1,f3`, `f1,f2`, ``) f(`* | uniq by (f1,f2) | rm f1,f3`, `f1,f2`, ``)
f(`* | uniq by (f1,f2) | fields f3`, `f1,f2`, ``) f(`* | uniq by (f1,f2) | fields f3`, `f1,f2`, ``)
f(`* | filter foo f1:bar`, `*`, ``)
f(`* | filter foo f1:bar | fields f2`, `f2`, ``)
f(`* | limit 10 | filter foo f1:bar | fields f2`, `_msg,f1,f2`, ``)
f(`* | filter foo f1:bar | fields f1`, `f1`, ``)
f(`* | filter foo f1:bar | rm f1`, `*`, `f1`)
f(`* | limit 10 | filter foo f1:bar | rm f1`, `*`, ``)
f(`* | filter foo f1:bar | rm f2`, `*`, `f2`)
f(`* | limit 10 | filter foo f1:bar | rm f2`, `*`, `f2`)
f(`* | fields x | filter foo f1:bar | rm f2`, `x`, ``)
f(`* | fields x,f1 | filter foo f1:bar | rm f2`, `f1,x`, ``)
f(`* | rm x,f1 | filter foo f1:bar`, `*`, `f1,x`)
f(`* | field_names as foo`, `*`, `_time`)
f(`* | field_names foo | fields bar`, `*`, `_time`)
f(`* | field_names foo | fields foo`, `*`, `_time`)
f(`* | field_names foo | rm foo`, `*`, `_time`)
f(`* | field_names foo | rm bar`, `*`, `_time`)
f(`* | field_names foo | rm _time`, `*`, `_time`)
f(`* | fields x,y | field_names as bar | fields baz`, `x,y`, ``)
f(`* | rm x,y | field_names as bar | fields baz`, `*`, `x,y`)
f(`* | extract from s1 "<f1>x<f2>"`, `*`, `f1,f2`)
f(`* | extract from s1 "<f1>x<f2>" | fields foo`, `foo`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,s1`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1,f2`, `foo,s1`, ``)
f(`* | extract from s1 "<f1>x<f2>" | rm foo`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,s1`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1`, `*`, `f1,f2,foo`)
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1,f2`, `*`, `f1,f2,foo,s1`)
f(`* | unpack_json`, `*`, ``)
f(`* | unpack_json from s1`, `*`, ``)
f(`* | unpack_json from s1 | fields f1`, `f1,s1`, ``)
f(`* | unpack_json from s1 | fields s1,f1`, `f1,s1`, ``)
f(`* | unpack_json from s1 | rm f1`, `*`, `f1`)
f(`* | unpack_json from s1 | rm f1,s1`, `*`, `f1`)
f(`* | unpack_logfmt`, `*`, ``)
f(`* | unpack_logfmt from s1`, `*`, ``)
f(`* | unpack_logfmt from s1 | fields f1`, `f1,s1`, ``)
f(`* | unpack_logfmt from s1 | fields s1,f1`, `f1,s1`, ``)
f(`* | unpack_logfmt from s1 | rm f1`, `*`, `f1`)
f(`* | unpack_logfmt from s1 | rm f1,s1`, `*`, `f1`)
f(`* | rm f1, f2`, `*`, `f1,f2`) f(`* | rm f1, f2`, `*`, `f1,f2`)
f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`) f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`)
f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`) f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`)

View file

@ -67,67 +67,103 @@ func parsePipes(lex *lexer) ([]pipe, error) {
if !lex.isKeyword("|") { if !lex.isKeyword("|") {
return nil, fmt.Errorf("expecting '|'; got %q", lex.token) return nil, fmt.Errorf("expecting '|'; got %q", lex.token)
} }
if !lex.mustNextToken() { lex.nextToken()
return nil, fmt.Errorf("missing token after '|'") p, err := parsePipe(lex)
} if err != nil {
switch { return nil, err
case lex.isKeyword("stats"):
ps, err := parsePipeStats(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("uniq"):
pu, err := parsePipeUniq(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
}
pipes = append(pipes, pu)
case lex.isKeyword("limit", "head"):
pl, err := parsePipeLimit(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
}
pipes = append(pipes, pl)
case lex.isKeyword("offset", "skip"):
ps, err := parsePipeOffset(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
}
pipes = append(pipes, ps)
case lex.isKeyword("fields"):
pf, err := parsePipeFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
}
pipes = append(pipes, pf)
case lex.isKeyword("copy", "cp"):
pc, err := parsePipeCopy(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
}
pipes = append(pipes, pc)
case lex.isKeyword("rename", "mv"):
pr, err := parsePipeRename(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
}
pipes = append(pipes, pr)
case lex.isKeyword("delete", "del", "rm"):
pd, err := parsePipeDelete(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
}
pipes = append(pipes, pd)
default:
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
} }
pipes = append(pipes, p)
} }
return pipes, nil return pipes, nil
} }
func parsePipe(lex *lexer) (pipe, error) {
switch {
case lex.isKeyword("copy", "cp"):
pc, err := parsePipeCopy(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
}
return pc, nil
case lex.isKeyword("delete", "del", "rm"):
pd, err := parsePipeDelete(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
}
return pd, nil
case lex.isKeyword("extract"):
pe, err := parsePipeExtract(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'extract' pipe: %w", err)
}
return pe, nil
case lex.isKeyword("field_names"):
pf, err := parsePipeFieldNames(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'field_names' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("fields"):
pf, err := parsePipeFields(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("filter"):
pf, err := parsePipeFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'filter' pipe: %w", err)
}
return pf, nil
case lex.isKeyword("limit", "head"):
pl, err := parsePipeLimit(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
}
return pl, nil
case lex.isKeyword("offset", "skip"):
ps, err := parsePipeOffset(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("rename", "mv"):
pr, err := parsePipeRename(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("stats"):
ps, err := parsePipeStats(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
}
return ps, nil
case lex.isKeyword("uniq"):
pu, err := parsePipeUniq(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
}
return pu, nil
case lex.isKeyword("unpack_json"):
pu, err := parsePipeUnpackJSON(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'unpack_json' pipe: %w", err)
}
return pu, nil
case lex.isKeyword("unpack_logfmt"):
pu, err := parsePipeUnpackLogfmt(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
}
return pu, nil
default:
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
}
}

View file

@ -40,7 +40,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
} }
if neededFields.contains("*") { if neededFields.contains("*") {
// update only unneeded fields // update only unneeded fields
unneededFields.addAll(pc.dstFields) unneededFields.addFields(pc.dstFields)
for i, srcField := range pc.srcFields { for i, srcField := range pc.srcFields {
if neededSrcFields[i] { if neededSrcFields[i] {
unneededFields.remove(srcField) unneededFields.remove(srcField)
@ -48,7 +48,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
} }
} else { } else {
// update only needed fields and reset unneeded fields // update only needed fields and reset unneeded fields
neededFields.removeAll(pc.dstFields) neededFields.removeFields(pc.dstFields)
for i, srcField := range pc.srcFields { for i, srcField := range pc.srcFields {
if neededSrcFields[i] { if neededSrcFields[i] {
neededFields.add(srcField) neededFields.add(srcField)

View file

@ -6,20 +6,9 @@ import (
) )
func TestPipeCopyUpdateNeededFields(t *testing.T) { func TestPipeCopyUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeCopy(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields
@ -53,6 +42,22 @@ func TestPipeCopyUpdateNeededFields(t *testing.T) {
f("copy s1 d1, s2 d2", "s2,d1,f1,f2", "", "s1,s2,f1,f2", "") f("copy s1 d1, s2 d2", "s2,d1,f1,f2", "", "s1,s2,f1,f2", "")
} }
func expectPipeNeededFields(t *testing.T, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
}
func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected, unneededFieldsExpected string) { func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
@ -75,7 +80,7 @@ func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected,
func newTestFieldsSet(fields string) fieldsSet { func newTestFieldsSet(fields string) fieldsSet {
fs := newFieldsSet() fs := newFieldsSet()
if fields != "" { if fields != "" {
fs.addAll(strings.Split(fields, ",")) fs.addFields(strings.Split(fields, ","))
} }
return fs return fs
} }

View file

@ -25,10 +25,10 @@ func (pd *pipeDelete) String() string {
func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet) { func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") { if neededFields.contains("*") {
// update only unneeded fields // update only unneeded fields
unneededFields.addAll(pd.fields) unneededFields.addFields(pd.fields)
} else { } else {
// update only needed fields // update only needed fields
neededFields.removeAll(pd.fields) neededFields.removeFields(pd.fields)
} }
} }

View file

@ -7,18 +7,7 @@ import (
func TestPipeDeleteUpdateNeededFields(t *testing.T) { func TestPipeDeleteUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeDelete(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields

View file

@ -0,0 +1,357 @@
package logstorage
import (
"fmt"
"html"
"strconv"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// pipeExtract processes '| extract from <field> <pattern>' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
type pipeExtract struct {
fromField string
steps []extractFormatStep
pattern string
}
func (pe *pipeExtract) String() string {
s := "extract"
if !isMsgFieldName(pe.fromField) {
s += " from " + quoteTokenIfNeeded(pe.fromField)
}
s += " " + quoteTokenIfNeeded(pe.pattern)
return s
}
func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone()
needFromField := false
for _, step := range pe.steps {
if step.field != "" {
if !unneededFieldsOrig.contains(step.field) {
needFromField = true
}
unneededFields.add(step.field)
}
}
if needFromField {
unneededFields.remove(pe.fromField)
} else {
unneededFields.add(pe.fromField)
}
} else {
needFromField := false
for _, step := range pe.steps {
if step.field != "" && neededFields.contains(step.field) {
needFromField = true
neededFields.remove(step.field)
}
}
if needFromField {
neededFields.add(pe.fromField)
}
}
}
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeExtractProcessorShard, workersCount)
for i := range shards {
ef := newExtractFormat(pe.steps)
rcs := make([]resultColumn, len(ef.fields))
for j := range rcs {
rcs[j].name = ef.fields[j].name
}
shards[i] = pipeExtractProcessorShard{
pipeExtractProcessorShardNopad: pipeExtractProcessorShardNopad{
ef: ef,
rcs: rcs,
},
}
}
pep := &pipeExtractProcessor{
pe: pe,
ppBase: ppBase,
shards: shards,
}
return pep
}
type pipeExtractProcessor struct {
pe *pipeExtract
ppBase pipeProcessor
shards []pipeExtractProcessorShard
}
type pipeExtractProcessorShard struct {
pipeExtractProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeExtractProcessorShardNopad{})%128]byte
}
type pipeExtractProcessorShardNopad struct {
ef *extractFormat
rcs []resultColumn
}
func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pep.shards[workerID]
ef := shard.ef
rcs := shard.rcs
c := br.getColumnByName(pep.pe.fromField)
if c.isConst {
v := c.valuesEncoded[0]
ef.apply(v)
for i, f := range ef.fields {
fieldValue := *f.value
rc := &rcs[i]
for range br.timestamps {
rc.addValue(fieldValue)
}
}
} else {
values := c.getValues(br)
for i, v := range values {
if i == 0 || values[i-1] != v {
ef.apply(v)
}
for j, f := range ef.fields {
rcs[j].addValue(*f.value)
}
}
}
br.addResultColumns(rcs)
pep.ppBase.writeBlock(workerID, br)
for i := range rcs {
rcs[i].resetValues()
}
}
func (pep *pipeExtractProcessor) flush() error {
return nil
}
func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
if !lex.isKeyword("extract") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "extract")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
pattern, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot read 'pattern': %w", err)
}
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", pattern, err)
}
pe := &pipeExtract{
fromField: fromField,
steps: steps,
pattern: pattern,
}
return pe, nil
}
type extractFormat struct {
// steps contains steps for extracting fields from string
steps []extractFormatStep
// matches contains matches for every step in steps
matches []string
// fields contains matches for non-empty fields
fields []extractField
}
type extractField struct {
name string
value *string
}
type extractFormatStep struct {
prefix string
field string
}
func newExtractFormat(steps []extractFormatStep) *extractFormat {
if len(steps) == 0 {
logger.Panicf("BUG: steps cannot be empty")
}
matches := make([]string, len(steps))
var fields []extractField
for i, step := range steps {
if step.field != "" {
fields = append(fields, extractField{
name: step.field,
value: &matches[i],
})
}
}
if len(fields) == 0 {
logger.Panicf("BUG: fields cannot be empty")
}
ef := &extractFormat{
steps: steps,
matches: matches,
fields: fields,
}
return ef
}
func (ef *extractFormat) apply(s string) {
clear(ef.matches)
steps := ef.steps
if prefix := steps[0].prefix; prefix != "" {
n := strings.Index(s, prefix)
if n < 0 {
// Mismatch
return
}
s = s[n+len(prefix):]
}
matches := ef.matches
for i := range steps {
nextPrefix := ""
if i+1 < len(steps) {
nextPrefix = steps[i+1].prefix
}
us, nOffset := tryUnquoteString(s)
if nOffset >= 0 {
// Matched quoted string
matches[i] = us
s = s[nOffset:]
if !strings.HasPrefix(s, nextPrefix) {
// Mismatch
return
}
s = s[len(nextPrefix):]
} else {
// Match unquoted string until the nextPrefix
if nextPrefix == "" {
matches[i] = s
return
}
n := strings.Index(s, nextPrefix)
if n < 0 {
// Mismatch
return
}
matches[i] = s[:n]
s = s[n+len(nextPrefix):]
}
}
}
func tryUnquoteString(s string) (string, int) {
if len(s) == 0 {
return s, -1
}
if s[0] != '"' && s[0] != '`' {
return s, -1
}
qp, err := strconv.QuotedPrefix(s)
if err != nil {
return s, -1
}
us, err := strconv.Unquote(qp)
if err != nil {
return s, -1
}
return us, len(qp)
}
func parseExtractFormatSteps(s string) ([]extractFormatStep, error) {
var steps []extractFormatStep
hasNamedField := false
n := strings.IndexByte(s, '<')
if n < 0 {
return nil, fmt.Errorf("missing <...> fields")
}
prefix := s[:n]
s = s[n+1:]
for {
n := strings.IndexByte(s, '>')
if n < 0 {
return nil, fmt.Errorf("missing '>' for <%s", s)
}
field := s[:n]
s = s[n+1:]
if field == "_" || field == "*" {
field = ""
}
steps = append(steps, extractFormatStep{
prefix: prefix,
field: field,
})
if !hasNamedField && field != "" {
hasNamedField = true
}
if len(s) == 0 {
break
}
n = strings.IndexByte(s, '<')
if n < 0 {
steps = append(steps, extractFormatStep{
prefix: s,
})
break
}
if n == 0 {
return nil, fmt.Errorf("missing delimiter after <%s>", field)
}
prefix = s[:n]
s = s[n+1:]
}
if !hasNamedField {
return nil, fmt.Errorf("missing named fields like <name>")
}
for i := range steps {
step := &steps[i]
step.prefix = html.UnescapeString(step.prefix)
}
return steps, nil
}

View file

@ -0,0 +1,213 @@
package logstorage
import (
"reflect"
"testing"
)
func TestExtractFormatApply(t *testing.T) {
f := func(pattern, s string, resultsExpected []string) {
t.Helper()
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
ef := newExtractFormat(steps)
ef.apply(s)
if len(ef.fields) != len(resultsExpected) {
t.Fatalf("unexpected number of results; got %d; want %d", len(ef.fields), len(resultsExpected))
}
for i, f := range ef.fields {
if v := *f.value; v != resultsExpected[i] {
t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
}
}
}
f("<foo>", "", []string{""})
f("<foo>", "abc", []string{"abc"})
f("<foo>bar", "", []string{""})
f("<foo>bar", "bar", []string{""})
f("<foo>bar", "bazbar", []string{"baz"})
f("<foo>bar", "a bazbar xdsf", []string{"a baz"})
f("<foo>bar<>", "a bazbar xdsf", []string{"a baz"})
f("<foo>bar<>x", "a bazbar xdsf", []string{"a baz"})
f("foo<bar>", "", []string{""})
f("foo<bar>", "foo", []string{""})
f("foo<bar>", "a foo xdf sdf", []string{" xdf sdf"})
f("foo<bar>", "a foo foobar", []string{" foobar"})
f("foo<bar>baz", "a foo foobar", []string{""})
f("foo<bar>baz", "a foobaz bar", []string{""})
f("foo<bar>baz", "a foo foobar baz", []string{" foobar "})
f("foo<bar>baz", "a foo foobar bazabc", []string{" foobar "})
f("ip=<ip> <> path=<path> ", "x=a, ip=1.2.3.4 method=GET host='abc' path=/foo/bar some tail here", []string{"1.2.3.4", "/foo/bar"})
// escaped pattern
f("ip=&lt;<ip>&gt;", "foo ip=<1.2.3.4> bar", []string{"1.2.3.4"})
f("ip=&lt;<ip>&gt;", "foo ip=<foo&amp;bar> bar", []string{"foo&amp;bar"})
// quoted fields
f(`"msg":<msg>,`, `{"foo":"bar","msg":"foo,b\"ar\n\t","baz":"x"}`, []string{`foo,b"ar` + "\n\t"})
f(`foo=<bar>`, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
f(`foo=<bar> `, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
f(`<foo>`, `"foo,\"bar"`, []string{`foo,"bar`})
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
}
func TestParseExtractFormatStepsSuccess(t *testing.T) {
f := func(s string, stepsExpected []extractFormatStep) {
t.Helper()
steps, err := parseExtractFormatSteps(s)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", s, err)
}
if !reflect.DeepEqual(steps, stepsExpected) {
t.Fatalf("unexpected steps for [%s]; got %v; want %v", s, steps, stepsExpected)
}
}
f("<foo>", []extractFormatStep{
{
field: "foo",
},
})
f("<foo>bar", []extractFormatStep{
{
field: "foo",
},
{
prefix: "bar",
},
})
f("<>bar<foo>", []extractFormatStep{
{},
{
prefix: "bar",
field: "foo",
},
})
f("bar<foo>", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
})
f("bar<foo>abc", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "abc",
},
})
f("bar<foo>abc<_>", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "abc",
},
})
f("<foo>bar<baz>", []extractFormatStep{
{
field: "foo",
},
{
prefix: "bar",
field: "baz",
},
})
f("bar<foo>baz", []extractFormatStep{
{
prefix: "bar",
field: "foo",
},
{
prefix: "baz",
},
})
f("&lt;<foo>&amp;gt;", []extractFormatStep{
{
prefix: "<",
field: "foo",
},
{
prefix: "&gt;",
},
})
}
func TestParseExtractFormatStepFailure(t *testing.T) {
f := func(s string) {
t.Helper()
_, err := parseExtractFormatSteps(s)
if err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
}
}
// empty string
f("")
// zero fields
f("foobar")
// Zero named fields
f("<>")
f("foo<>")
f("<>foo")
f("foo<_>bar<*>baz<>xxx")
// missing delimiter between fields
f("<foo><bar>")
f("<><bar>")
f("<foo><>")
f("bb<foo><><bar>aa")
f("aa<foo><bar>")
f("aa<foo><bar>bb")
// missing >
f("<foo")
f("foo<bar")
}
func TestPipeExtractUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("extract from x '<foo>'", "*", "", "*", "foo")
// all the needed fields, unneeded fields do not intersect with fromField and output fields
f("extract from x '<foo>'", "*", "f1,f2", "*", "f1,f2,foo")
// all the needed fields, unneeded fields intersect with fromField
f("extract from x '<foo>'", "*", "f2,x", "*", "f2,foo")
// all the needed fields, unneeded fields intersect with output fields
f("extract from x '<foo>x<bar>'", "*", "f2,foo", "*", "bar,f2,foo")
// all the needed fields, unneeded fields intersect with all the output fields
f("extract from x '<foo>x<bar>'", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
// needed fields do not intersect with fromField and output fields
f("extract from x '<foo>x<bar>'", "f1,f2", "", "f1,f2", "")
// needed fields intersect with fromField
f("extract from x '<foo>x<bar>'", "f2,x", "", "f2,x", "")
// needed fields intersect with output fields
f("extract from x '<foo>x<bar>'", "f2,foo", "", "f2,x", "")
// needed fields intersect with fromField and output fields
f("extract from x '<foo>x<bar>'", "f2,foo,x,y", "", "f2,x,y", "")
}

View file

@ -0,0 +1,80 @@
package logstorage
import (
"testing"
)
func BenchmarkExtractFormatApply(b *testing.B) {
a := []string{
`{"level":"error","ts":1716113701.63973,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498","foo":"bar"}`,
`{"level":"error","ts":1716113370.2321634,"caller":"gcm/export.go:434","msg":"Failed to export metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).exportBuffer\n\tcloud/kubernetes/metrics/common/gcm/export.go:434\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:383\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).Flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:365\ngoogle3/cloud/kubernetes/metrics/components/collector/adapter/adapter.(*adapter).Finalize\n\tcloud/kubernetes/metrics/components/collector/adapter/consume.go:131\ngoogle3/cloud/kubernetes/metrics/components/collector/prometheus/prometheus.(*parser).ParseText\n\tcloud/kubernetes/metrics/components/collector/prometheus/parse.go:158\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:103\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
`{"level":"error","ts":1716113127.7496774,"caller":"collector/collector.go:105","msg":"Failed to process metrics","scrape_target":"http://localhost:8093/metrics","error":"failed to finalize exporting: \"2 errors occurred:\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\n\"","stacktrace":"google3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:105\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
`{"level":"error","ts":1716113547.6429873,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498", "foo":"bar"}`,
`{"level":"error","ts":1716113541.4445803,"caller":"periodicexporter/periodic_exporter.go:180","msg":"Failed to flush metrics to Cloud Monitoring","error":"1 error occurred:\n\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\n\n","stacktrace":"google3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).exportAll\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:180\ngoogle3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).periodicExporter\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:157","foo":"bar"}`,
}
b.Run("single-small-field-at-start", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>"`, a)
})
b.Run("single-small-field-at-start-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>`, a)
})
b.Run("single-small-field-at-end", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"foo":"<foo>"`, a)
})
b.Run("single-small-field-at-end-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"foo":<foo>`, a)
})
b.Run("single-medium-field", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"msg":"<message>"`, a)
})
b.Run("single-medium-field-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"msg":<message>`, a)
})
b.Run("single-large-field", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"stacktrace":"<stacktrace>"`, a)
})
b.Run("single-large-field-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"stacktrace":<stacktrace>`, a)
})
b.Run("two-fields", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>",<_>"msg":"<msg>"`, a)
})
b.Run("two-fields-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>,<_>"msg":<msg>`, a)
})
b.Run("many-fields", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":"<level>","ts":"<ts>","caller":"<caller>","msg":"<msg>","error":"<error>"`, a)
})
b.Run("many-fields-unquote", func(b *testing.B) {
benchmarkExtractFormatApply(b, `"level":<level>,"ts":<ts>,"caller":<caller>,"msg":<msg>,"error":<error>`, a)
})
}
func benchmarkExtractFormatApply(b *testing.B, pattern string, a []string) {
steps, err := parseExtractFormatSteps(pattern)
if err != nil {
b.Fatalf("unexpected error: %s", err)
}
n := 0
for _, s := range a {
n += len(s)
}
b.ReportAllocs()
b.SetBytes(int64(n))
b.RunParallel(func(pb *testing.PB) {
sink := 0
ef := newExtractFormat(steps)
for pb.Next() {
for _, s := range a {
ef.apply(s)
for _, v := range ef.matches {
sink += len(v)
}
}
}
GlobalSink.Add(uint64(sink))
})
}

View file

@ -0,0 +1,167 @@
package logstorage
import (
"fmt"
"strings"
"unsafe"
)
// pipeFieldNames processes '| field_names' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#field-names-pipe
type pipeFieldNames struct {
// resultName is the name of the column to write results to.
resultName string
// isFirstPipe is set to true if '| field_names' pipe is the first in the query.
//
// This allows skipping loading of _time column.
isFirstPipe bool
}
func (pf *pipeFieldNames) String() string {
return "field_names as " + quoteTokenIfNeeded(pf.resultName)
}
func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {
neededFields.add("*")
unneededFields.reset()
if pf.isFirstPipe {
unneededFields.add("_time")
}
}
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeFieldNamesProcessorShard, workersCount)
for i := range shards {
shards[i] = pipeFieldNamesProcessorShard{
pipeFieldNamesProcessorShardNopad: pipeFieldNamesProcessorShardNopad{
m: make(map[string]struct{}),
},
}
}
pfp := &pipeFieldNamesProcessor{
pf: pf,
stopCh: stopCh,
ppBase: ppBase,
shards: shards,
}
return pfp
}
type pipeFieldNamesProcessor struct {
pf *pipeFieldNames
stopCh <-chan struct{}
ppBase pipeProcessor
shards []pipeFieldNamesProcessorShard
}
type pipeFieldNamesProcessorShard struct {
pipeFieldNamesProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFieldNamesProcessorShardNopad{})%128]byte
}
type pipeFieldNamesProcessorShardNopad struct {
// m holds unique field names.
m map[string]struct{}
}
func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
cs := br.getColumns()
for _, c := range cs {
if _, ok := shard.m[c.name]; !ok {
nameCopy := strings.Clone(c.name)
shard.m[nameCopy] = struct{}{}
}
}
}
func (pfp *pipeFieldNamesProcessor) flush() error {
if needStop(pfp.stopCh) {
return nil
}
// merge state across shards
shards := pfp.shards
m := shards[0].m
shards = shards[1:]
for i := range shards {
for k := range shards[i].m {
m[k] = struct{}{}
}
}
if pfp.pf.isFirstPipe {
m["_time"] = struct{}{}
}
// write result
wctx := &pipeFieldNamesWriteContext{
pfp: pfp,
}
wctx.rcs[0].name = pfp.pf.resultName
for k := range m {
wctx.writeRow(k)
}
wctx.flush()
return nil
}
type pipeFieldNamesWriteContext struct {
pfp *pipeFieldNamesProcessor
rcs [1]resultColumn
br blockResult
valuesLen int
}
func (wctx *pipeFieldNamesWriteContext) writeRow(v string) {
wctx.rcs[0].addValue(v)
wctx.valuesLen += len(v)
if wctx.valuesLen >= 1_000_000 {
wctx.flush()
}
}
func (wctx *pipeFieldNamesWriteContext) flush() {
br := &wctx.br
wctx.valuesLen = 0
// Flush rcs to ppBase
br.setResultColumns(wctx.rcs[:1])
wctx.pfp.ppBase.writeBlock(0, br)
br.reset()
wctx.rcs[0].resetValues()
}
func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
if !lex.isKeyword("field_names") {
return nil, fmt.Errorf("expecting 'field_names'; got %q", lex.token)
}
lex.nextToken()
if lex.isKeyword("as") {
lex.nextToken()
}
resultName, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
}
pf := &pipeFieldNames{
resultName: resultName,
}
return pf, nil
}

View file

@ -0,0 +1,27 @@
package logstorage
import (
"testing"
)
func TestPipeFieldNamesUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("field_names as f1", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("field_names as f3", "*", "f1,f2", "*", "")
// all the needed fields, unneeded fields intersect with src
f("field_names as f1", "*", "s1,f1,f2", "*", "")
// needed fields do not intersect with src
f("field_names as f3", "f1,f2", "", "*", "")
// needed fields intersect with src
f("field_names as f1", "s1,f1,f2", "", "*", "")
}

View file

@ -32,7 +32,7 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
if neededFields.contains("*") { if neededFields.contains("*") {
// subtract unneeded fields from pf.fields // subtract unneeded fields from pf.fields
neededFields.reset() neededFields.reset()
neededFields.addAll(pf.fields) neededFields.addFields(pf.fields)
for _, f := range unneededFields.getAll() { for _, f := range unneededFields.getAll() {
neededFields.remove(f) neededFields.remove(f)
} }

View file

@ -7,18 +7,7 @@ import (
func TestPipeFieldsUpdateNeededFields(t *testing.T) { func TestPipeFieldsUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeFields(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields

View file

@ -0,0 +1,108 @@
package logstorage
import (
"fmt"
"unsafe"
)
// pipeFilter processes '| filter ...' queries.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe
type pipeFilter struct {
// f is a filter to apply to the written rows.
f filter
}
func (pf *pipeFilter) String() string {
return "filter " + pf.f.String()
}
func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
fs := newFieldsSet()
pf.f.updateNeededFields(fs)
for f := range fs {
unneededFields.remove(f)
}
} else {
pf.f.updateNeededFields(neededFields)
}
}
func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeFilterProcessorShard, workersCount)
pfp := &pipeFilterProcessor{
pf: pf,
ppBase: ppBase,
shards: shards,
}
return pfp
}
type pipeFilterProcessor struct {
pf *pipeFilter
ppBase pipeProcessor
shards []pipeFilterProcessorShard
}
type pipeFilterProcessorShard struct {
pipeFilterProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFilterProcessorShardNopad{})%128]byte
}
type pipeFilterProcessorShardNopad struct {
br blockResult
bm bitmap
}
func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
pfp.pf.f.applyToBlockResult(br, bm)
if bm.areAllBitsSet() {
// Fast path - the filter didn't filter out anything - send br to the base pipe as is.
pfp.ppBase.writeBlock(workerID, br)
return
}
if bm.isZero() {
// Nothing to send
return
}
// Slow path - copy the remaining rows from br to shard.br before sending them to base pipe.
shard.br.initFromFilterAllColumns(br, bm)
pfp.ppBase.writeBlock(workerID, &shard.br)
}
func (pfp *pipeFilterProcessor) flush() error {
return nil
}
func parsePipeFilter(lex *lexer) (*pipeFilter, error) {
if !lex.isKeyword("filter") {
return nil, fmt.Errorf("expecting 'filter'; got %q", lex.token)
}
lex.nextToken()
f, err := parseFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'filter': %w", err)
}
pf := &pipeFilter{
f: f,
}
return pf, nil
}

View file

@ -0,0 +1,27 @@
package logstorage
import (
"testing"
)
func TestPipeFilterUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("filter foo f1:bar", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("filter foo f3:bar", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("filter foo f1:bar", "*", "s1,f1,f2", "*", "s1,f2")
// needed fields do not intersect with src
f("filter foo f3:bar", "f1,f2", "", "_msg,f1,f2,f3", "")
// needed fields intersect with src
f("filter foo f1:bar", "s1,f1,f2", "", "_msg,f1,f2,s1", "")
}

View file

@ -9,18 +9,18 @@ import (
// //
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe // See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
type pipeLimit struct { type pipeLimit struct {
n uint64 limit uint64
} }
func (pl *pipeLimit) String() string { func (pl *pipeLimit) String() string {
return fmt.Sprintf("limit %d", pl.n) return fmt.Sprintf("limit %d", pl.limit)
} }
func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) { func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
} }
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
if pl.n == 0 { if pl.limit == 0 {
// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor // Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
cancel() cancel()
} }
@ -45,7 +45,7 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
} }
rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps))) rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= plp.pl.n { if rowsProcessed <= plp.pl.limit {
// Fast path - write all the rows to ppBase. // Fast path - write all the rows to ppBase.
plp.ppBase.writeBlock(workerID, br) plp.ppBase.writeBlock(workerID, br)
return return
@ -53,13 +53,13 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
// Slow path - overflow. Write the remaining rows if needed. // Slow path - overflow. Write the remaining rows if needed.
rowsProcessed -= uint64(len(br.timestamps)) rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= plp.pl.n { if rowsProcessed >= plp.pl.limit {
// Nothing to write. There is no need in cancel() call, since it has been called by another goroutine. // Nothing to write. There is no need in cancel() call, since it has been called by another goroutine.
return return
} }
// Write remaining rows. // Write remaining rows.
keepRows := plp.pl.n - rowsProcessed keepRows := plp.pl.limit - rowsProcessed
br.truncateRows(int(keepRows)) br.truncateRows(int(keepRows))
plp.ppBase.writeBlock(workerID, br) plp.ppBase.writeBlock(workerID, br)
@ -83,7 +83,7 @@ func parsePipeLimit(lex *lexer) (*pipeLimit, error) {
} }
lex.nextToken() lex.nextToken()
pl := &pipeLimit{ pl := &pipeLimit{
n: n, limit: n,
} }
return pl, nil return pl, nil
} }

View file

@ -0,0 +1,21 @@
package logstorage
import (
"testing"
)
func TestPipeLimitUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("limit 10", "*", "", "*", "")
// all the needed fields, plus unneeded fields
f("limit 10", "*", "f1,f2", "*", "f1,f2")
// needed fields
f("limit 10", "f1,f2", "", "f1,f2", "")
}

View file

@ -9,11 +9,11 @@ import (
// //
// See https://docs.victoriametrics.com/victorialogs/logsql/#offset-pipe // See https://docs.victoriametrics.com/victorialogs/logsql/#offset-pipe
type pipeOffset struct { type pipeOffset struct {
n uint64 offset uint64
} }
func (po *pipeOffset) String() string { func (po *pipeOffset) String() string {
return fmt.Sprintf("offset %d", po.n) return fmt.Sprintf("offset %d", po.offset)
} }
func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) { func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
@ -39,17 +39,17 @@ func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {
} }
rowsProcessed := pop.rowsProcessed.Add(uint64(len(br.timestamps))) rowsProcessed := pop.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= pop.po.n { if rowsProcessed <= pop.po.offset {
return return
} }
rowsProcessed -= uint64(len(br.timestamps)) rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= pop.po.n { if rowsProcessed >= pop.po.offset {
pop.ppBase.writeBlock(workerID, br) pop.ppBase.writeBlock(workerID, br)
return return
} }
rowsSkip := pop.po.n - rowsProcessed rowsSkip := pop.po.offset - rowsProcessed
br.skipRows(int(rowsSkip)) br.skipRows(int(rowsSkip))
pop.ppBase.writeBlock(workerID, br) pop.ppBase.writeBlock(workerID, br)
} }
@ -70,7 +70,7 @@ func parsePipeOffset(lex *lexer) (*pipeOffset, error) {
} }
lex.nextToken() lex.nextToken()
po := &pipeOffset{ po := &pipeOffset{
n: n, offset: n,
} }
return po, nil return po, nil
} }

View file

@ -0,0 +1,21 @@
package logstorage
import (
"testing"
)
func TestPipeOffsetUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("offset 10", "*", "", "*", "")
// all the needed fields, plus unneeded fields
f("offset 10", "*", "f1,f2", "*", "f1,f2")
// needed fields
f("offset 10", "f1,f2", "", "f1,f2", "")
}

View file

@ -40,7 +40,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
if neededFields.contains("*") { if neededFields.contains("*") {
// update only unneeded fields // update only unneeded fields
unneededFields.addAll(pr.dstFields) unneededFields.addFields(pr.dstFields)
for i, srcField := range pr.srcFields { for i, srcField := range pr.srcFields {
if neededSrcFields[i] { if neededSrcFields[i] {
unneededFields.remove(srcField) unneededFields.remove(srcField)
@ -50,7 +50,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} else { } else {
// update only needed fields and reset unneeded fields // update only needed fields and reset unneeded fields
neededFields.removeAll(pr.dstFields) neededFields.removeFields(pr.dstFields)
for i, srcField := range pr.srcFields { for i, srcField := range pr.srcFields {
if neededSrcFields[i] { if neededSrcFields[i] {
neededFields.add(srcField) neededFields.add(srcField)

View file

@ -7,18 +7,7 @@ import (
func TestPipeRenameUpdateNeededFields(t *testing.T) { func TestPipeRenameUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeRename(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields

View file

@ -11,7 +11,6 @@ import (
"sync/atomic" "sync/atomic"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
) )
@ -80,9 +79,12 @@ func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
shards := make([]pipeSortProcessorShard, workersCount) shards := make([]pipeSortProcessorShard, workersCount)
for i := range shards { for i := range shards {
shard := &shards[i] shards[i] = pipeSortProcessorShard{
shard.ps = ps pipeSortProcessorShardNopad: pipeSortProcessorShardNopad{
shard.stateSizeBudget = stateSizeBudgetChunk ps: ps,
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk maxStateSize -= stateSizeBudgetChunk
} }
@ -202,12 +204,14 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
columnValues := shard.columnValues[:0] columnValues := shard.columnValues[:0]
for _, c := range cs { for _, c := range cs {
columnValues = append(columnValues, c.getValues(br)) values := c.getValues(br)
columnValues = append(columnValues, values)
} }
shard.columnValues = columnValues shard.columnValues = columnValues
// Generate byColumns // Generate byColumns
var rc resultColumn valuesEncoded := make([]string, len(br.timestamps))
shard.stateSizeBudget -= len(valuesEncoded) * int(unsafe.Sizeof(valuesEncoded[0]))
bb := bbPool.Get() bb := bbPool.Get()
for rowIdx := range br.timestamps { for rowIdx := range br.timestamps {
@ -219,7 +223,12 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v) bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v)
bb.B = append(bb.B, ',') bb.B = append(bb.B, ',')
} }
rc.addValue(bytesutil.ToUnsafeString(bb.B)) if rowIdx > 0 && valuesEncoded[rowIdx-1] == string(bb.B) {
valuesEncoded[rowIdx] = valuesEncoded[rowIdx-1]
} else {
valuesEncoded[rowIdx] = string(bb.B)
shard.stateSizeBudget -= len(bb.B)
}
} }
bbPool.Put(bb) bbPool.Put(bb)
@ -232,13 +241,13 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
{ {
c: &blockResultColumn{ c: &blockResultColumn{
valueType: valueTypeString, valueType: valueTypeString,
encodedValues: rc.values, valuesEncoded: valuesEncoded,
}, },
i64Values: i64Values, i64Values: i64Values,
f64Values: f64Values, f64Values: f64Values,
}, },
} }
shard.stateSizeBudget -= len(rc.buf) + int(unsafe.Sizeof(byColumns[0])+unsafe.Sizeof(*byColumns[0].c)) shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]) + unsafe.Sizeof(*byColumns[0].c))
// Append br to shard.blocks. // Append br to shard.blocks.
shard.blocks = append(shard.blocks, sortBlock{ shard.blocks = append(shard.blocks, sortBlock{
@ -260,8 +269,8 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
continue continue
} }
if c.isConst { if c.isConst {
bc.i64Values = shard.createInt64Values(c.encodedValues) bc.i64Values = shard.createInt64Values(c.valuesEncoded)
bc.f64Values = shard.createFloat64Values(c.encodedValues) bc.f64Values = shard.createFloat64Values(c.valuesEncoded)
continue continue
} }
@ -512,14 +521,10 @@ func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
for _, bf := range byFields { for _, bf := range byFields {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, bf.name)
name: bf.name,
})
} }
for _, c := range b.otherColumns { for _, c := range b.otherColumns {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, c.name)
name: c.name,
})
} }
wctx.rcs = rcs wctx.rcs = rcs
} }
@ -558,7 +563,7 @@ func (wctx *pipeSortWriteContext) flush() {
wctx.psp.ppBase.writeBlock(0, br) wctx.psp.ppBase.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetKeepName() rcs[i].resetValues()
} }
} }
@ -610,8 +615,8 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
if cA.c.isConst && cB.c.isConst { if cA.c.isConst && cB.c.isConst {
// Fast path - compare const values // Fast path - compare const values
ccA := cA.c.encodedValues[0] ccA := cA.c.valuesEncoded[0]
ccB := cB.c.encodedValues[0] ccB := cB.c.valuesEncoded[0]
if ccA == ccB { if ccA == ccB {
continue continue
} }
@ -689,8 +694,10 @@ func parsePipeSort(lex *lexer) (*pipeSort, error) {
lex.nextToken() lex.nextToken()
var ps pipeSort var ps pipeSort
if lex.isKeyword("by") { if lex.isKeyword("by", "(") {
lex.nextToken() if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseBySortFields(lex) bfs, err := parseBySortFields(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err) return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)

View file

@ -7,18 +7,7 @@ import (
func TestPipeSortUpdateNeededFields(t *testing.T) { func TestPipeSortUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeSort(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields

View file

@ -20,23 +20,34 @@ type pipeStats struct {
// byFields contains field names with optional buckets from 'by(...)' clause. // byFields contains field names with optional buckets from 'by(...)' clause.
byFields []*byStatsField byFields []*byStatsField
// resultNames contains names of output results generated by funcs.
resultNames []string
// funcs contains stats functions to execute. // funcs contains stats functions to execute.
funcs []statsFunc funcs []pipeStatsFunc
}
type pipeStatsFunc struct {
// f is stats function to execute
f statsFunc
// neededFieldsForFunc contains needed fields for f execution
neededFieldsForFunc []string
// iff is an additional filter, which is applied to results before executing f on them
iff filter
// resultName is the name of the output generated by f
resultName string
} }
type statsFunc interface { type statsFunc interface {
// String returns string representation of statsFunc // String returns string representation of statsFunc
String() string String() string
// neededFields returns the needed fields for calculating the given stats // updateNeededFields update neededFields with the fields needed for calculating the given stats
neededFields() []string updateNeededFields(neededFields fieldsSet)
// newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc. // newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc
// //
// It also must return the size in bytes of the returned statsProcessor. // It also must return the size in bytes of the returned statsProcessor
newStatsProcessor() (statsProcessor, int) newStatsProcessor() (statsProcessor, int)
} }
@ -77,7 +88,12 @@ func (ps *pipeStats) String() string {
} }
a := make([]string, len(ps.funcs)) a := make([]string, len(ps.funcs))
for i, f := range ps.funcs { for i, f := range ps.funcs {
a[i] = f.String() + " as " + quoteTokenIfNeeded(ps.resultNames[i]) line := f.f.String()
if f.iff != nil {
line += " if (" + f.iff.String() + ")"
}
line += " as " + quoteTokenIfNeeded(f.resultName)
a[i] = line
} }
s += strings.Join(a, ", ") s += strings.Join(a, ", ")
return s return s
@ -87,22 +103,17 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
neededFieldsOrig := neededFields.clone() neededFieldsOrig := neededFields.clone()
neededFields.reset() neededFields.reset()
byFields := make([]string, len(ps.byFields)) // byFields are needed unconditionally, since the output number of rows depends on them.
for i, bf := range ps.byFields { for _, bf := range ps.byFields {
byFields[i] = bf.name neededFields.add(bf.name)
} }
for _, f := range byFields { for _, f := range ps.funcs {
if neededFieldsOrig.contains(f) && !unneededFields.contains(f) { if neededFieldsOrig.contains(f.resultName) && !unneededFields.contains(f.resultName) {
neededFields.addAll(byFields) f.f.updateNeededFields(neededFields)
} if f.iff != nil {
} f.iff.updateNeededFields(neededFields)
}
for i, resultName := range ps.resultNames {
if neededFieldsOrig.contains(resultName) && !unneededFields.contains(resultName) {
funcFields := ps.funcs[i].neededFields()
neededFields.addAll(byFields)
neededFields.addAll(funcFields)
} }
} }
@ -115,11 +126,21 @@ func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{},
maxStateSize := int64(float64(memory.Allowed()) * 0.3) maxStateSize := int64(float64(memory.Allowed()) * 0.3)
shards := make([]pipeStatsProcessorShard, workersCount) shards := make([]pipeStatsProcessorShard, workersCount)
funcsLen := len(ps.funcs)
for i := range shards { for i := range shards {
shard := &shards[i] shards[i] = pipeStatsProcessorShard{
shard.ps = ps pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{
shard.m = make(map[string]*pipeStatsGroup) ps: ps,
shard.stateSizeBudget = stateSizeBudgetChunk
m: make(map[string]*pipeStatsGroup),
bms: make([]bitmap, funcsLen),
brs: make([]*blockResult, funcsLen),
brsBuf: make([]blockResult, funcsLen),
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk maxStateSize -= stateSizeBudgetChunk
} }
@ -159,7 +180,13 @@ type pipeStatsProcessorShard struct {
type pipeStatsProcessorShardNopad struct { type pipeStatsProcessorShardNopad struct {
ps *pipeStats ps *pipeStats
m map[string]*pipeStatsGroup
m map[string]*pipeStatsGroup
// bms, brs and brsBuf are used for applying per-func filters.
bms []bitmap
brs []*blockResult
brsBuf []blockResult
columnValues [][]string columnValues [][]string
keyBuf []byte keyBuf []byte
@ -170,10 +197,14 @@ type pipeStatsProcessorShardNopad struct {
func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) { func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
byFields := shard.ps.byFields byFields := shard.ps.byFields
// Apply per-function filters
brs := shard.applyPerFunctionFilters(br)
// Process stats for the defined functions
if len(byFields) == 0 { if len(byFields) == 0 {
// Fast path - pass all the rows to a single group with empty key. // Fast path - pass all the rows to a single group with empty key.
psg := shard.getPipeStatsGroup(nil) psg := shard.getPipeStatsGroup(nil)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br) shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return return
} }
if len(byFields) == 1 { if len(byFields) == 1 {
@ -182,19 +213,19 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
c := br.getColumnByName(bf.name) c := br.getColumnByName(bf.name)
if c.isConst { if c.isConst {
// Fast path for column with constant value. // Fast path for column with constant value.
v := br.getBucketedValue(c.encodedValues[0], bf) v := br.getBucketedValue(c.valuesEncoded[0], bf)
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(v)) shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(v))
psg := shard.getPipeStatsGroup(shard.keyBuf) psg := shard.getPipeStatsGroup(shard.keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br) shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return return
} }
values := c.getBucketedValues(br, bf) values := c.getValuesBucketed(br, bf)
if areConstValues(values) { if areConstValues(values) {
// Fast path for column with constant values. // Fast path for column with constant values.
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(values[0])) shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(values[0]))
psg := shard.getPipeStatsGroup(shard.keyBuf) psg := shard.getPipeStatsGroup(shard.keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br) shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
return return
} }
@ -206,7 +237,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i])) keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
psg = shard.getPipeStatsGroup(keyBuf) psg = shard.getPipeStatsGroup(keyBuf)
} }
shard.stateSizeBudget -= psg.updateStatsForRow(br, i) shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
return return
@ -216,7 +247,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
columnValues := shard.columnValues[:0] columnValues := shard.columnValues[:0]
for _, bf := range byFields { for _, bf := range byFields {
c := br.getColumnByName(bf.name) c := br.getColumnByName(bf.name)
values := c.getBucketedValues(br, bf) values := c.getValuesBucketed(br, bf)
columnValues = append(columnValues, values) columnValues = append(columnValues, values)
} }
shard.columnValues = columnValues shard.columnValues = columnValues
@ -236,7 +267,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[0])) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[0]))
} }
psg := shard.getPipeStatsGroup(keyBuf) psg := shard.getPipeStatsGroup(keyBuf)
shard.stateSizeBudget -= psg.updateStatsForAllRows(br) shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
return return
} }
@ -261,11 +292,44 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
} }
psg = shard.getPipeStatsGroup(keyBuf) psg = shard.getPipeStatsGroup(keyBuf)
} }
shard.stateSizeBudget -= psg.updateStatsForRow(br, i) shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
} }
func (shard *pipeStatsProcessorShard) applyPerFunctionFilters(brSrc *blockResult) []*blockResult {
funcs := shard.ps.funcs
brs := shard.brs
for i := range funcs {
iff := funcs[i].iff
if iff == nil {
// Fast path - there are no per-function filters
brs[i] = brSrc
continue
}
bm := &shard.bms[i]
bm.init(len(brSrc.timestamps))
bm.setBits()
iff.applyToBlockResult(brSrc, bm)
if bm.areAllBitsSet() {
// Fast path - per-function filter doesn't filter out rows
brs[i] = brSrc
continue
}
// Store the remaining rows for the needed per-func fields to brDst
brDst := &shard.brsBuf[i]
if bm.isZero() {
brDst.reset()
} else {
brDst.initFromFilterNeededColumns(brSrc, bm, funcs[i].neededFieldsForFunc)
}
brs[i] = brDst
}
return brs
}
func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGroup { func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGroup {
psg := shard.m[string(key)] psg := shard.m[string(key)]
if psg != nil { if psg != nil {
@ -274,7 +338,7 @@ func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGr
sfps := make([]statsProcessor, len(shard.ps.funcs)) sfps := make([]statsProcessor, len(shard.ps.funcs))
for i, f := range shard.ps.funcs { for i, f := range shard.ps.funcs {
sfp, stateSize := f.newStatsProcessor() sfp, stateSize := f.f.newStatsProcessor()
sfps[i] = sfp sfps[i] = sfp
shard.stateSizeBudget -= stateSize shard.stateSizeBudget -= stateSize
} }
@ -291,18 +355,18 @@ type pipeStatsGroup struct {
sfps []statsProcessor sfps []statsProcessor
} }
func (psg *pipeStatsGroup) updateStatsForAllRows(br *blockResult) int { func (psg *pipeStatsGroup) updateStatsForAllRows(brs []*blockResult) int {
n := 0 n := 0
for _, sfp := range psg.sfps { for i, sfp := range psg.sfps {
n += sfp.updateStatsForAllRows(br) n += sfp.updateStatsForAllRows(brs[i])
} }
return n return n
} }
func (psg *pipeStatsGroup) updateStatsForRow(br *blockResult, rowIdx int) int { func (psg *pipeStatsGroup) updateStatsForRow(brs []*blockResult, rowIdx int) int {
n := 0 n := 0
for _, sfp := range psg.sfps { for i, sfp := range psg.sfps {
n += sfp.updateStatsForRow(br, rowIdx) n += sfp.updateStatsForRow(brs[i], rowIdx)
} }
return n return n
} }
@ -368,16 +432,12 @@ func (psp *pipeStatsProcessor) flush() error {
m = shards[0].m m = shards[0].m
} }
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.resultNames)) rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs))
for _, bf := range byFields { for _, bf := range byFields {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, bf.name)
name: bf.name,
})
} }
for _, resultName := range psp.ps.resultNames { for _, f := range psp.ps.funcs {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, f.resultName)
name: resultName,
})
} }
var br blockResult var br blockResult
@ -423,7 +483,7 @@ func (psp *pipeStatsProcessor) flush() error {
psp.ppBase.writeBlock(0, &br) psp.ppBase.writeBlock(0, &br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetKeepName() rcs[i].resetValues()
} }
valuesLen = 0 valuesLen = 0
} }
@ -443,8 +503,10 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
lex.nextToken() lex.nextToken()
var ps pipeStats var ps pipeStats
if lex.isKeyword("by") { if lex.isKeyword("by", "(") {
lex.nextToken() if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseByStatsFields(lex) bfs, err := parseByStatsFields(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err) return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
@ -452,17 +514,36 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
ps.byFields = bfs ps.byFields = bfs
} }
var resultNames []string var funcs []pipeStatsFunc
var funcs []statsFunc
for { for {
sf, resultName, err := parseStatsFunc(lex) var f pipeStatsFunc
sf, err := parseStatsFunc(lex)
if err != nil { if err != nil {
return nil, err return nil, err
} }
resultNames = append(resultNames, resultName) f.f = sf
funcs = append(funcs, sf)
if lex.isKeyword("if") {
iff, err := parseIfFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'if' filter for %s: %w", sf, err)
}
f.iff = iff
neededFields := newFieldsSet()
iff.updateNeededFields(neededFields)
f.neededFieldsForFunc = neededFields.getAll()
}
resultName, err := parseResultName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for %s: %w", sf, err)
}
f.resultName = resultName
funcs = append(funcs, f)
if lex.isKeyword("|", ")", "") { if lex.isKeyword("|", ")", "") {
ps.resultNames = resultNames
ps.funcs = funcs ps.funcs = funcs
return &ps, nil return &ps, nil
} }
@ -473,90 +554,107 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
} }
} }
func parseStatsFunc(lex *lexer) (statsFunc, string, error) { func parseIfFilter(lex *lexer) (filter, error) {
var sf statsFunc if !lex.isKeyword("if") {
return nil, fmt.Errorf("unexpected keyword %q; expecting 'if'", lex.token)
}
lex.nextToken()
if !lex.isKeyword("(") {
return nil, fmt.Errorf("unexpected token %q after 'if'; expecting '('", lex.token)
}
lex.nextToken()
if lex.isKeyword(")") {
lex.nextToken()
return &filterNoop{}, nil
}
f, err := parseFilter(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'if' filter: %w", err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("unexpected token %q after 'if' filter; expecting ')'", lex.token)
}
lex.nextToken()
return f, nil
}
func parseStatsFunc(lex *lexer) (statsFunc, error) {
switch { switch {
case lex.isKeyword("count"): case lex.isKeyword("count"):
scs, err := parseStatsCount(lex) scs, err := parseStatsCount(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count' func: %w", err) return nil, fmt.Errorf("cannot parse 'count' func: %w", err)
} }
sf = scs return scs, nil
case lex.isKeyword("count_empty"): case lex.isKeyword("count_empty"):
scs, err := parseStatsCountEmpty(lex) scs, err := parseStatsCountEmpty(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count_empty' func: %w", err) return nil, fmt.Errorf("cannot parse 'count_empty' func: %w", err)
} }
sf = scs return scs, nil
case lex.isKeyword("count_uniq"): case lex.isKeyword("count_uniq"):
sus, err := parseStatsCountUniq(lex) sus, err := parseStatsCountUniq(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'count_uniq' func: %w", err) return nil, fmt.Errorf("cannot parse 'count_uniq' func: %w", err)
} }
sf = sus return sus, nil
case lex.isKeyword("sum"): case lex.isKeyword("sum"):
sss, err := parseStatsSum(lex) sss, err := parseStatsSum(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'sum' func: %w", err) return nil, fmt.Errorf("cannot parse 'sum' func: %w", err)
} }
sf = sss return sss, nil
case lex.isKeyword("max"): case lex.isKeyword("max"):
sms, err := parseStatsMax(lex) sms, err := parseStatsMax(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'max' func: %w", err) return nil, fmt.Errorf("cannot parse 'max' func: %w", err)
} }
sf = sms return sms, nil
case lex.isKeyword("min"): case lex.isKeyword("min"):
sms, err := parseStatsMin(lex) sms, err := parseStatsMin(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'min' func: %w", err) return nil, fmt.Errorf("cannot parse 'min' func: %w", err)
} }
sf = sms return sms, nil
case lex.isKeyword("avg"): case lex.isKeyword("avg"):
sas, err := parseStatsAvg(lex) sas, err := parseStatsAvg(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'avg' func: %w", err) return nil, fmt.Errorf("cannot parse 'avg' func: %w", err)
} }
sf = sas return sas, nil
case lex.isKeyword("uniq_values"): case lex.isKeyword("uniq_values"):
sus, err := parseStatsUniqValues(lex) sus, err := parseStatsUniqValues(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'uniq_values' func: %w", err) return nil, fmt.Errorf("cannot parse 'uniq_values' func: %w", err)
} }
sf = sus return sus, nil
case lex.isKeyword("values"): case lex.isKeyword("values"):
svs, err := parseStatsValues(lex) svs, err := parseStatsValues(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'values' func: %w", err) return nil, fmt.Errorf("cannot parse 'values' func: %w", err)
} }
sf = svs return svs, nil
case lex.isKeyword("sum_len"): case lex.isKeyword("sum_len"):
sss, err := parseStatsSumLen(lex) sss, err := parseStatsSumLen(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'sum_len' func: %w", err) return nil, fmt.Errorf("cannot parse 'sum_len' func: %w", err)
} }
sf = sss return sss, nil
case lex.isKeyword("quantile"): case lex.isKeyword("quantile"):
sqs, err := parseStatsQuantile(lex) sqs, err := parseStatsQuantile(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err) return nil, fmt.Errorf("cannot parse 'quantile' func: %w", err)
} }
sf = sqs return sqs, nil
case lex.isKeyword("median"): case lex.isKeyword("median"):
sms, err := parseStatsMedian(lex) sms, err := parseStatsMedian(lex)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err) return nil, fmt.Errorf("cannot parse 'median' func: %w", err)
} }
sf = sms return sms, nil
default: default:
return nil, "", fmt.Errorf("unknown stats func %q", lex.token) return nil, fmt.Errorf("unknown stats func %q", lex.token)
} }
resultName, err := parseResultName(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse result name for %s: %w", sf, err)
}
return sf, resultName, nil
} }
func parseResultName(lex *lexer) (string, error) { func parseResultName(lex *lexer) (string, error) {
@ -619,10 +717,11 @@ func parseByStatsFields(lex *lexer) ([]*byStatsField, error) {
lex.nextToken() lex.nextToken()
return bfs, nil return bfs, nil
} }
fieldName, err := parseFieldName(lex) fieldName, err := getCompoundPhrase(lex, false)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse field name: %w", err) return nil, fmt.Errorf("cannot parse field name: %w", err)
} }
fieldName = getCanonicalColumnName(fieldName)
bf := &byStatsField{ bf := &byStatsField{
name: fieldName, name: fieldName,
} }
@ -796,10 +895,10 @@ func parseFieldNamesInParens(lex *lexer) ([]string, error) {
} }
func parseFieldName(lex *lexer) (string, error) { func parseFieldName(lex *lexer) (string, error) {
if lex.isKeyword(",", "(", ")", "[", "]", "|", ":", "") { fieldName, err := getCompoundToken(lex)
return "", fmt.Errorf("unexpected token: %q", lex.token) if err != nil {
return "", fmt.Errorf("cannot parse field name: %w", err)
} }
fieldName := getCompoundPhrase(lex, false)
fieldName = getCanonicalColumnName(fieldName) fieldName = getCanonicalColumnName(fieldName)
return fieldName, nil return fieldName, nil
} }

View file

@ -7,18 +7,7 @@ import (
func TestPipeStatsUpdateNeededFields(t *testing.T) { func TestPipeStatsUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeStats(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields
@ -44,7 +33,7 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
f("stats count(f1,f2) r1, sum(f3,f4) r2", "*", "r1,r3", "f3,f4", "") f("stats count(f1,f2) r1, sum(f3,f4) r2", "*", "r1,r3", "f3,f4", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2", "b1,b2", "") f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1", "b1,b2", "") f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "", "") f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "*", "r1,r3", "b1,b2,f1,f3", "") f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "*", "r1,r3", "b1,b2,f1,f3", "")
// needed fields do not intersect with stats fields // needed fields do not intersect with stats fields
@ -52,8 +41,8 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
f("stats count(*) r1", "r2", "", "", "") f("stats count(*) r1", "r2", "", "", "")
f("stats count(f1,f2) r1", "r2", "", "", "") f("stats count(f1,f2) r1", "r2", "", "", "")
f("stats count(f1,f2) r1, sum(f3,f4) r2", "r3", "", "", "") f("stats count(f1,f2) r1, sum(f3,f4) r2", "r3", "", "", "")
f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "", "") f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "b1,b2", "")
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "", "") f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "b1,b2", "")
// needed fields intersect with stats fields // needed fields intersect with stats fields
f("stats count() r1", "r1,r2", "", "", "") f("stats count() r1", "r1,r2", "", "", "")

View file

@ -18,9 +18,12 @@ func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
shards := make([]pipeTopkProcessorShard, workersCount) shards := make([]pipeTopkProcessorShard, workersCount)
for i := range shards { for i := range shards {
shard := &shards[i] shards[i] = pipeTopkProcessorShard{
shard.ps = ps pipeTopkProcessorShardNopad: pipeTopkProcessorShardNopad{
shard.stateSizeBudget = stateSizeBudgetChunk ps: ps,
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk maxStateSize -= stateSizeBudgetChunk
} }
@ -72,10 +75,11 @@ type pipeTopkProcessorShardNopad struct {
tmpRow pipeTopkRow tmpRow pipeTopkRow
// these are aux fields for determining whether the next row must be stored in rows. // these are aux fields for determining whether the next row must be stored in rows.
byColumnValues [][]string byColumnValues [][]string
otherColumnValues []pipeTopkOtherColumn csOther []*blockResultColumn
byColumns []string byColumns []string
otherColumns []Field byColumnsIsTime []bool
otherColumns []Field
// stateSizeBudget is the remaining budget for the whole state size for the shard. // stateSizeBudget is the remaining budget for the whole state size for the shard.
// The per-shard budget is provided in chunks from the parent pipeTopkProcessor. // The per-shard budget is provided in chunks from the parent pipeTopkProcessor.
@ -83,13 +87,10 @@ type pipeTopkProcessorShardNopad struct {
} }
type pipeTopkRow struct { type pipeTopkRow struct {
byColumns []string byColumns []string
otherColumns []Field byColumnsIsTime []bool
} otherColumns []Field
timestamp int64
type pipeTopkOtherColumn struct {
name string
values []string
} }
func (r *pipeTopkRow) clone() *pipeTopkRow { func (r *pipeTopkRow) clone() *pipeTopkRow {
@ -98,6 +99,8 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
byColumnsCopy[i] = strings.Clone(r.byColumns[i]) byColumnsCopy[i] = strings.Clone(r.byColumns[i])
} }
byColumnsIsTime := append([]bool{}, r.byColumnsIsTime...)
otherColumnsCopy := make([]Field, len(r.otherColumns)) otherColumnsCopy := make([]Field, len(r.otherColumns))
for i := range otherColumnsCopy { for i := range otherColumnsCopy {
src := &r.otherColumns[i] src := &r.otherColumns[i]
@ -107,8 +110,10 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
} }
return &pipeTopkRow{ return &pipeTopkRow{
byColumns: byColumnsCopy, byColumns: byColumnsCopy,
otherColumns: otherColumnsCopy, byColumnsIsTime: byColumnsIsTime,
otherColumns: otherColumnsCopy,
timestamp: r.timestamp,
} }
} }
@ -120,6 +125,8 @@ func (r *pipeTopkRow) sizeBytes() int {
} }
n += len(r.byColumns) * int(unsafe.Sizeof(r.byColumns[0])) n += len(r.byColumns) * int(unsafe.Sizeof(r.byColumns[0]))
n += len(r.byColumnsIsTime) * int(unsafe.Sizeof(r.byColumnsIsTime[0]))
for _, f := range r.otherColumns { for _, f := range r.otherColumns {
n += len(f.Name) + len(f.Value) n += len(f.Name) + len(f.Value)
} }
@ -167,14 +174,15 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
byColumnValues := shard.byColumnValues[:0] byColumnValues := shard.byColumnValues[:0]
for _, c := range cs { for _, c := range cs {
byColumnValues = append(byColumnValues, c.getValues(br)) values := c.getValues(br)
byColumnValues = append(byColumnValues, values)
} }
shard.byColumnValues = byColumnValues shard.byColumnValues = byColumnValues
byColumns := shard.byColumns[:0] byColumns := shard.byColumns[:0]
otherColumns := shard.otherColumns[:0] byColumnsIsTime := shard.byColumnsIsTime[:0]
bb := bbPool.Get() bb := bbPool.Get()
for rowIdx := range br.timestamps { for rowIdx, timestamp := range br.timestamps {
byColumns = byColumns[:0] byColumns = byColumns[:0]
bb.B = bb.B[:0] bb.B = bb.B[:0]
for i, values := range byColumnValues { for i, values := range byColumnValues {
@ -183,31 +191,33 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
bb.B = append(bb.B, ',') bb.B = append(bb.B, ',')
} }
byColumns = append(byColumns, bytesutil.ToUnsafeString(bb.B)) byColumns = append(byColumns, bytesutil.ToUnsafeString(bb.B))
byColumnsIsTime = append(byColumnsIsTime, false)
otherColumns = otherColumns[:0] shard.addRow(br, byColumns, byColumnsIsTime, cs, rowIdx, timestamp)
for i, values := range byColumnValues {
otherColumns = append(otherColumns, Field{
Name: cs[i].name,
Value: values[rowIdx],
})
}
shard.addRow(byColumns, otherColumns)
} }
bbPool.Put(bb) bbPool.Put(bb)
shard.byColumns = byColumns shard.byColumns = byColumns
shard.otherColumns = otherColumns shard.byColumnsIsTime = byColumnsIsTime
} else { } else {
// Sort by byFields // Sort by byFields
byColumnValues := shard.byColumnValues[:0] byColumnValues := shard.byColumnValues[:0]
byColumnsIsTime := shard.byColumnsIsTime[:0]
for _, bf := range byFields { for _, bf := range byFields {
c := br.getColumnByName(bf.name) c := br.getColumnByName(bf.name)
byColumnValues = append(byColumnValues, c.getValues(br))
byColumnsIsTime = append(byColumnsIsTime, c.isTime)
var values []string
if !c.isTime {
values = c.getValues(br)
}
byColumnValues = append(byColumnValues, values)
} }
shard.byColumnValues = byColumnValues shard.byColumnValues = byColumnValues
shard.byColumnsIsTime = byColumnsIsTime
otherColumnValues := shard.otherColumnValues[:0] csOther := shard.csOther[:0]
for _, c := range cs { for _, c := range cs {
isByField := false isByField := false
for _, bf := range byFields { for _, bf := range byFields {
@ -217,42 +227,35 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
} }
} }
if !isByField { if !isByField {
otherColumnValues = append(otherColumnValues, pipeTopkOtherColumn{ csOther = append(csOther, c)
name: c.name,
values: c.getValues(br),
})
} }
} }
shard.otherColumnValues = otherColumnValues shard.csOther = csOther
// add rows to shard // add rows to shard
byColumns := shard.byColumns[:0] byColumns := shard.byColumns[:0]
otherColumns := shard.otherColumns[:0] for rowIdx, timestamp := range br.timestamps {
for rowIdx := range br.timestamps {
byColumns = byColumns[:0] byColumns = byColumns[:0]
for _, values := range byColumnValues {
byColumns = append(byColumns, values[rowIdx]) for i, values := range byColumnValues {
v := ""
if !byColumnsIsTime[i] {
v = values[rowIdx]
}
byColumns = append(byColumns, v)
} }
otherColumns = otherColumns[:0] shard.addRow(br, byColumns, byColumnsIsTime, csOther, rowIdx, timestamp)
for _, ocv := range otherColumnValues {
otherColumns = append(otherColumns, Field{
Name: ocv.name,
Value: ocv.values[rowIdx],
})
}
shard.addRow(byColumns, otherColumns)
} }
shard.byColumns = byColumns shard.byColumns = byColumns
shard.otherColumns = otherColumns
} }
} }
func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []Field) { func (shard *pipeTopkProcessorShard) addRow(br *blockResult, byColumns []string, byColumnsIsTime []bool, csOther []*blockResultColumn, rowIdx int, timestamp int64) {
r := &shard.tmpRow r := &shard.tmpRow
r.byColumns = byColumns r.byColumns = byColumns
r.otherColumns = otherColumns r.byColumnsIsTime = byColumnsIsTime
r.timestamp = timestamp
rows := shard.rows rows := shard.rows
if len(rows) > 0 && !topkLess(shard.ps, r, rows[0]) { if len(rows) > 0 && !topkLess(shard.ps, r, rows[0]) {
@ -261,9 +264,25 @@ func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []F
} }
// Slow path - add r to shard.rows. // Slow path - add r to shard.rows.
// Populate r.otherColumns
otherColumns := shard.otherColumns[:0]
for _, c := range csOther {
v := c.getValueAtRow(br, rowIdx)
otherColumns = append(otherColumns, Field{
Name: c.name,
Value: v,
})
}
shard.otherColumns = otherColumns
r.otherColumns = otherColumns
// Clone r, so it doesn't refer the original data.
r = r.clone() r = r.clone()
shard.stateSizeBudget -= r.sizeBytes() shard.stateSizeBudget -= r.sizeBytes()
if uint64(len(rows)) < shard.ps.limit {
// Push r to shard.rows.
if uint64(len(rows)) < shard.ps.offset+shard.ps.limit {
heap.Push(shard, r) heap.Push(shard, r)
shard.stateSizeBudget -= int(unsafe.Sizeof(r)) shard.stateSizeBudget -= int(unsafe.Sizeof(r))
} else { } else {
@ -443,21 +462,23 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
for _, bf := range byFields { for _, bf := range byFields {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, bf.name)
name: bf.name,
})
} }
for _, c := range r.otherColumns { for _, c := range r.otherColumns {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, c.Name)
name: c.Name,
})
} }
wctx.rcs = rcs wctx.rcs = rcs
} }
var tmpBuf []byte
byColumns := r.byColumns byColumns := r.byColumns
byColumnsIsTime := r.byColumnsIsTime
for i := range byFields { for i := range byFields {
v := byColumns[i] v := byColumns[i]
if byColumnsIsTime[i] {
tmpBuf = marshalTimestampRFC3339NanoString(tmpBuf[:0], r.timestamp)
v = bytesutil.ToUnsafeString(tmpBuf)
}
rcs[i].addValue(v) rcs[i].addValue(v)
wctx.valuesLen += len(v) wctx.valuesLen += len(v)
} }
@ -490,7 +511,7 @@ func (wctx *pipeTopkWriteContext) flush() {
wctx.ptp.ppBase.writeBlock(0, br) wctx.ptp.ppBase.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetKeepName() rcs[i].resetValues()
} }
} }
@ -529,25 +550,79 @@ func topkLess(ps *pipeSort, a, b *pipeTopkRow) bool {
byFields := ps.byFields byFields := ps.byFields
csA := a.byColumns csA := a.byColumns
csB := b.byColumns isTimeA := a.byColumnsIsTime
for k := range csA { csB := b.byColumns
isTimeB := b.byColumnsIsTime
for i := range csA {
isDesc := ps.isDesc isDesc := ps.isDesc
if len(byFields) > 0 && byFields[k].isDesc { if len(byFields) > 0 && byFields[i].isDesc {
isDesc = !isDesc isDesc = !isDesc
} }
vA := csA[k] if isTimeA[i] && isTimeB[i] {
vB := csB[k] // Fast path - compare timestamps
if a.timestamp == b.timestamp {
continue
}
if isDesc {
return b.timestamp < a.timestamp
}
return a.timestamp < b.timestamp
}
vA := csA[i]
vB := csB[i]
var bb *bytesutil.ByteBuffer
if isTimeA[i] || isTimeB[i] {
bb = bbPool.Get()
}
if isTimeA[i] {
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
vA = bytesutil.ToUnsafeString(bb.B)
} else if isTimeB[i] {
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
vB = bytesutil.ToUnsafeString(bb.B)
}
if vA == vB { if vA == vB {
if bb != nil {
bbPool.Put(bb)
}
continue continue
} }
if isDesc { if isDesc {
return stringsutil.LessNatural(vB, vA) vA, vB = vB, vA
} }
return stringsutil.LessNatural(vA, vB) ok := lessString(vA, vB)
if bb != nil {
bbPool.Put(bb)
}
return ok
} }
return false return false
} }
func lessString(a, b string) bool {
if a == b {
return false
}
nA, okA := tryParseUint64(a)
nB, okB := tryParseUint64(b)
if okA && okB {
return nA < nB
}
fA, okA := tryParseFloat64(a)
fB, okB := tryParseFloat64(b)
if okA && okB {
return fA < fB
}
return stringsutil.LessNatural(a, b)
}

View file

@ -3,6 +3,7 @@ package logstorage
import ( import (
"fmt" "fmt"
"slices" "slices"
"strings"
"sync/atomic" "sync/atomic"
"unsafe" "unsafe"
@ -40,7 +41,7 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
if len(pu.byFields) == 0 { if len(pu.byFields) == 0 {
neededFields.add("*") neededFields.add("*")
} else { } else {
neededFields.addAll(pu.byFields) neededFields.addFields(pu.byFields)
} }
} }
@ -49,10 +50,13 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
shards := make([]pipeUniqProcessorShard, workersCount) shards := make([]pipeUniqProcessorShard, workersCount)
for i := range shards { for i := range shards {
shard := &shards[i] shards[i] = pipeUniqProcessorShard{
shard.pu = pu pipeUniqProcessorShardNopad: pipeUniqProcessorShardNopad{
shard.m = make(map[string]struct{}) pu: pu,
shard.stateSizeBudget = stateSizeBudgetChunk m: make(map[string]struct{}),
stateSizeBudget: stateSizeBudgetChunk,
},
}
maxStateSize -= stateSizeBudgetChunk maxStateSize -= stateSizeBudgetChunk
} }
@ -116,7 +120,6 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
return false return false
} }
m := shard.m
byFields := shard.pu.byFields byFields := shard.pu.byFields
if len(byFields) == 0 { if len(byFields) == 0 {
// Take into account all the columns in br. // Take into account all the columns in br.
@ -129,20 +132,41 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
} }
if _, ok := m[string(keyBuf)]; !ok { shard.updateState(bytesutil.ToUnsafeString(keyBuf))
m[string(keyBuf)] = struct{}{}
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
}
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
return true return true
} }
if len(byFields) == 1 {
// Fast path for a single field.
c := br.getColumnByName(byFields[0])
if c.isConst {
v := c.valuesEncoded[0]
shard.updateState(v)
return true
}
if c.valueType == valueTypeDict {
for _, v := range c.dictValues {
shard.updateState(v)
}
return true
}
values := c.getValues(br)
for i, v := range values {
if i == 0 || values[i-1] != values[i] {
shard.updateState(v)
}
}
return true
}
// Take into account only the selected columns. // Take into account only the selected columns.
columnValues := shard.columnValues[:0] columnValues := shard.columnValues[:0]
for _, f := range byFields { for _, f := range byFields {
c := br.getColumnByName(f) c := br.getColumnByName(f)
columnValues = append(columnValues, c.getValues(br)) values := c.getValues(br)
columnValues = append(columnValues, values)
} }
shard.columnValues = columnValues shard.columnValues = columnValues
@ -163,16 +187,21 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
for _, values := range columnValues { for _, values := range columnValues {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[i])) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[i]))
} }
if _, ok := m[string(keyBuf)]; !ok { shard.updateState(bytesutil.ToUnsafeString(keyBuf))
m[string(keyBuf)] = struct{}{}
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
}
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
return true return true
} }
func (shard *pipeUniqProcessorShard) updateState(v string) {
if _, ok := shard.m[v]; !ok {
vCopy := strings.Clone(v)
shard.m[vCopy] = struct{}{}
shard.stateSizeBudget -= len(vCopy) + int(unsafe.Sizeof(vCopy))
}
}
func (pup *pipeUniqProcessor) writeBlock(workerID uint, br *blockResult) { func (pup *pipeUniqProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 { if len(br.timestamps) == 0 {
return return
@ -253,6 +282,19 @@ func (pup *pipeUniqProcessor) flush() error {
} }
wctx.writeRow(rowFields) wctx.writeRow(rowFields)
} }
} else if len(byFields) == 1 {
fieldName := byFields[0]
for k := range m {
if needStop(pup.stopCh) {
return nil
}
rowFields = append(rowFields[:0], Field{
Name: fieldName,
Value: k,
})
wctx.writeRow(rowFields)
}
} else { } else {
for k := range m { for k := range m {
if needStop(pup.stopCh) { if needStop(pup.stopCh) {
@ -317,9 +359,7 @@ func (wctx *pipeUniqWriteContext) writeRow(rowFields []Field) {
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
for _, f := range rowFields { for _, f := range rowFields {
rcs = append(rcs, resultColumn{ rcs = appendResultColumnWithName(rcs, f.Name)
name: f.Name,
})
} }
wctx.rcs = rcs wctx.rcs = rcs
} }
@ -349,7 +389,7 @@ func (wctx *pipeUniqWriteContext) flush() {
wctx.pup.ppBase.writeBlock(0, br) wctx.pup.ppBase.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetKeepName() rcs[i].resetValues()
} }
} }
@ -360,8 +400,10 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
lex.nextToken() lex.nextToken()
var pu pipeUniq var pu pipeUniq
if lex.isKeyword("by") { if lex.isKeyword("by", "(") {
lex.nextToken() if lex.isKeyword("by") {
lex.nextToken()
}
bfs, err := parseFieldNamesInParens(lex) bfs, err := parseFieldNamesInParens(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err) return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)

View file

@ -7,18 +7,7 @@ import (
func TestPipeUniqUpdateNeededFields(t *testing.T) { func TestPipeUniqUpdateNeededFields(t *testing.T) {
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
nfs := newTestFieldsSet(neededFields)
unfs := newTestFieldsSet(unneededFields)
lex := newLexer(s)
p, err := parsePipeUniq(lex)
if err != nil {
t.Fatalf("cannot parse %s: %s", s, err)
}
p.updateNeededFields(nfs, unfs)
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
} }
// all the needed fields // all the needed fields

View file

@ -0,0 +1,147 @@
package logstorage
import (
"fmt"
"unsafe"
)
// pipeUnpackJSON processes '| unpack_json ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe
type pipeUnpackJSON struct {
fromField string
resultPrefix string
}
func (pu *pipeUnpackJSON) String() string {
s := "unpack_json"
if !isMsgFieldName(pu.fromField) {
s += " from " + quoteTokenIfNeeded(pu.fromField)
}
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
return s
}
func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFields.remove(pu.fromField)
} else {
neededFields.add(pu.fromField)
}
}
func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeUnpackJSONProcessorShard, workersCount)
pup := &pipeUnpackJSONProcessor{
pu: pu,
ppBase: ppBase,
shards: shards,
}
return pup
}
type pipeUnpackJSONProcessor struct {
pu *pipeUnpackJSON
ppBase pipeProcessor
shards []pipeUnpackJSONProcessorShard
}
type pipeUnpackJSONProcessorShard struct {
pipeUnpackJSONProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUnpackJSONProcessorShardNopad{})%128]byte
}
type pipeUnpackJSONProcessorShardNopad struct {
p JSONParser
wctx pipeUnpackWriteContext
}
func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {
if len(v) == 0 || v[0] != '{' {
// This isn't a JSON object
return nil
}
if err := shard.p.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {
// Cannot parse v
return nil
}
return shard.p.Fields
}
func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
resultPrefix := pup.pu.resultPrefix
shard := &pup.shards[workerID]
wctx := &shard.wctx
wctx.init(br, pup.ppBase)
c := br.getColumnByName(pup.pu.fromField)
if c.isConst {
v := c.valuesEncoded[0]
extraFields := shard.parseJSON(v, resultPrefix)
for rowIdx := range br.timestamps {
wctx.writeRow(rowIdx, extraFields)
}
} else {
values := c.getValues(br)
var extraFields []Field
for i, v := range values {
if i == 0 || values[i-1] != v {
extraFields = shard.parseJSON(v, resultPrefix)
}
wctx.writeRow(i, extraFields)
}
}
wctx.flush()
shard.p.reset()
}
func (pup *pipeUnpackJSONProcessor) flush() error {
return nil
}
func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
if !lex.isKeyword("unpack_json") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_json")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
resultPrefix := ""
if lex.isKeyword("result_prefix") {
lex.nextToken()
p, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
}
resultPrefix = p
}
pu := &pipeUnpackJSON{
fromField: fromField,
resultPrefix: resultPrefix,
}
return pu, nil
}

View file

@ -0,0 +1,376 @@
package logstorage
import (
"math/rand"
"slices"
"strings"
"sync"
"testing"
)
func TestPipeUnpackJSON(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// single row, unpack from _msg
f("unpack_json", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar"}`},
{"foo", "bar"},
},
})
// single row, unpack from _msg into _msg
f("unpack_json", [][]Field{
{
{"_msg", `{"_msg":"bar"}`},
},
}, [][]Field{
{
{"_msg", "bar"},
},
})
// single row, unpack from missing field
f("unpack_json from x", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar"}`},
},
})
// single row, unpack from non-json field
f("unpack_json from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// single row, unpack from non-dict json
f("unpack_json from x", [][]Field{
{
{"x", `["foobar"]`},
},
}, [][]Field{
{
{"x", `["foobar"]`},
},
})
f("unpack_json from x", [][]Field{
{
{"x", `1234`},
},
}, [][]Field{
{
{"x", `1234`},
},
})
f("unpack_json from x", [][]Field{
{
{"x", `"xxx"`},
},
}, [][]Field{
{
{"x", `"xxx"`},
},
})
// single row, unpack from named field
f("unpack_json from x", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
},
}, [][]Field{
{
{"x", `NaN`},
{"foo", "bar"},
{"baz", "xyz"},
{"a", "123"},
{"b", `["foo","bar"]`},
{"y.z.a", "b"},
},
})
// multiple rows with distinct number of fields
f("unpack_json from x", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
},
}, [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", "abc"},
{"foo", "bar"},
{"baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `["bar",123]`},
{"x", `{"z":["bar",123]}`},
},
})
// multiple rows with distinct number of fields with result_prefix
f("unpack_json from x result_prefix qwe_", [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
},
}, [][]Field{
{
{"x", `{"foo":"bar","baz":"xyz"}`},
{"y", "abc"},
{"qwe_foo", "bar"},
{"qwe_baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `{"z":["bar",123]}`},
{"qwe_z", `["bar",123]`},
},
})
}
func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
lex := newLexer(pipeStr)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
}
workersCount := 5
stopCh := make(chan struct{})
cancel := func() {}
ppTest := newTestPipeProcessor()
pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
brw := newTestBlockResultWriter(workersCount, pp)
for _, row := range rows {
brw.writeRow(row)
}
brw.flush()
ppTest.expectRows(t, rowsExpected)
}
func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
return &testBlockResultWriter{
workersCount: workersCount,
ppBase: ppBase,
}
}
type testBlockResultWriter struct {
workersCount int
ppBase pipeProcessor
rcs []resultColumn
br blockResult
}
func (brw *testBlockResultWriter) writeRow(row []Field) {
if !brw.areSameFields(row) {
brw.flush()
brw.rcs = brw.rcs[:0]
for _, field := range row {
brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
}
}
for i, field := range row {
brw.rcs[i].addValue(field.Value)
}
if rand.Intn(5) == 0 {
brw.flush()
}
}
func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
if len(brw.rcs) != len(row) {
return false
}
for i, rc := range brw.rcs {
if rc.name != row[i].Name {
return false
}
}
return true
}
func (brw *testBlockResultWriter) flush() {
brw.br.setResultColumns(brw.rcs)
workerID := rand.Intn(brw.workersCount)
brw.ppBase.writeBlock(uint(workerID), &brw.br)
brw.br.reset()
for i := range brw.rcs {
brw.rcs[i].resetValues()
}
}
func newTestPipeProcessor() *testPipeProcessor {
return &testPipeProcessor{}
}
type testPipeProcessor struct {
resultRowsLock sync.Mutex
resultRows [][]Field
}
func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
cs := br.getColumns()
var columnValues [][]string
for _, c := range cs {
values := c.getValues(br)
columnValues = append(columnValues, values)
}
for i := range br.timestamps {
row := make([]Field, len(columnValues))
for j, values := range columnValues {
r := &row[j]
r.Name = strings.Clone(cs[j].name)
r.Value = strings.Clone(values[i])
}
pp.resultRowsLock.Lock()
pp.resultRows = append(pp.resultRows, row)
pp.resultRowsLock.Unlock()
}
}
func (pp *testPipeProcessor) flush() error {
return nil
}
func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
t.Helper()
if len(pp.resultRows) != len(expectedRows) {
t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
}
sortTestRows(pp.resultRows)
sortTestRows(expectedRows)
for i, resultRow := range pp.resultRows {
expectedRow := expectedRows[i]
if len(resultRow) != len(expectedRow) {
t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
}
for j, resultField := range resultRow {
expectedField := expectedRow[j]
if resultField.Name != expectedField.Name {
t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
}
if resultField.Value != expectedField.Value {
t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
}
}
}
}
func sortTestRows(rows [][]Field) {
slices.SortFunc(rows, func(a, b []Field) int {
reverse := -1
if len(a) > len(b) {
reverse = 1
a, b = b, a
}
for i, fA := range a {
fB := b[i]
if fA.Name == fB.Name {
if fA.Value == fB.Value {
continue
}
if fA.Value < fB.Value {
return reverse
}
return -reverse
}
if fA.Name < fB.Name {
return reverse
}
return -reverse
}
if len(a) == len(b) {
return 0
}
return reverse
})
}
func rowsToString(rows [][]Field) string {
a := make([]string, len(rows))
for i, row := range rows {
a[i] = rowToString(row)
}
return strings.Join(a, "\n")
}
func rowToString(row []Field) string {
a := make([]string, len(row))
for i, f := range row {
a[i] = f.String()
}
return "{" + strings.Join(a, ",") + "}"
}
func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("unpack_json from x", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unpack_json from x", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("unpack_json from x", "*", "f2,x", "*", "f2")
// needed fields do not intersect with src
f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")
// needed fields intersect with src
f("unpack_json from x", "f2,x", "", "f2,x", "")
}

View file

@ -0,0 +1,289 @@
package logstorage
import (
"fmt"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// pipeUnpackLogfmt processes '| unpack_logfmt ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe
type pipeUnpackLogfmt struct {
fromField string
resultPrefix string
}
func (pu *pipeUnpackLogfmt) String() string {
s := "unpack_logfmt"
if !isMsgFieldName(pu.fromField) {
s += " from " + quoteTokenIfNeeded(pu.fromField)
}
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
return s
}
func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFields.remove(pu.fromField)
} else {
neededFields.add(pu.fromField)
}
}
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeUnpackLogfmtProcessorShard, workersCount)
pup := &pipeUnpackLogfmtProcessor{
pu: pu,
ppBase: ppBase,
shards: shards,
}
return pup
}
type pipeUnpackLogfmtProcessor struct {
pu *pipeUnpackLogfmt
ppBase pipeProcessor
shards []pipeUnpackLogfmtProcessorShard
}
type pipeUnpackLogfmtProcessorShard struct {
pipeUnpackLogfmtProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUnpackLogfmtProcessorShardNopad{})%128]byte
}
type pipeUnpackLogfmtProcessorShardNopad struct {
p logfmtParser
wctx pipeUnpackWriteContext
}
func (pup *pipeUnpackLogfmtProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
resultPrefix := pup.pu.resultPrefix
shard := &pup.shards[workerID]
wctx := &shard.wctx
wctx.init(br, pup.ppBase)
c := br.getColumnByName(pup.pu.fromField)
if c.isConst {
v := c.valuesEncoded[0]
extraFields := shard.p.parse(v, resultPrefix)
for rowIdx := range br.timestamps {
wctx.writeRow(rowIdx, extraFields)
}
} else {
values := c.getValues(br)
var extraFields []Field
for i, v := range values {
if i == 0 || values[i-1] != v {
extraFields = shard.p.parse(v, resultPrefix)
}
wctx.writeRow(i, extraFields)
}
}
wctx.flush()
shard.p.reset()
}
func (pup *pipeUnpackLogfmtProcessor) flush() error {
return nil
}
func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {
if !lex.isKeyword("unpack_logfmt") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_logfmt")
}
lex.nextToken()
fromField := "_msg"
if lex.isKeyword("from") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
}
fromField = f
}
resultPrefix := ""
if lex.isKeyword("result_prefix") {
lex.nextToken()
p, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
}
resultPrefix = p
}
pu := &pipeUnpackLogfmt{
fromField: fromField,
resultPrefix: resultPrefix,
}
return pu, nil
}
type pipeUnpackWriteContext struct {
brSrc *blockResult
csSrc []*blockResultColumn
ppBase pipeProcessor
rcs []resultColumn
br blockResult
valuesLen int
}
func (wctx *pipeUnpackWriteContext) init(brSrc *blockResult, ppBase pipeProcessor) {
wctx.brSrc = brSrc
wctx.csSrc = brSrc.getColumns()
wctx.ppBase = ppBase
}
func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
csSrc := wctx.csSrc
rcs := wctx.rcs
areEqualColumns := len(rcs) == len(csSrc)+len(extraFields)
if areEqualColumns {
for i, f := range extraFields {
if rcs[len(csSrc)+i].name != f.Name {
areEqualColumns = false
break
}
}
}
if !areEqualColumns {
// send the current block to bbBase and construct a block with new set of columns
wctx.flush()
rcs = wctx.rcs[:0]
for _, c := range csSrc {
rcs = appendResultColumnWithName(rcs, c.name)
}
for _, f := range extraFields {
rcs = appendResultColumnWithName(rcs, f.Name)
}
wctx.rcs = rcs
}
brSrc := wctx.brSrc
for i, c := range csSrc {
v := c.getValueAtRow(brSrc, rowIdx)
rcs[i].addValue(v)
wctx.valuesLen += len(v)
}
for i, f := range extraFields {
v := f.Value
rcs[len(csSrc)+i].addValue(v)
wctx.valuesLen += len(v)
}
if wctx.valuesLen >= 1_000_000 {
wctx.flush()
}
}
func (wctx *pipeUnpackWriteContext) flush() {
rcs := wctx.rcs
wctx.valuesLen = 0
if len(rcs) == 0 {
return
}
// Flush rcs to ppBase
br := &wctx.br
br.setResultColumns(rcs)
wctx.ppBase.writeBlock(0, br)
br.reset()
for i := range rcs {
rcs[i].resetValues()
}
}
type logfmtParser struct {
Fields []Field
buf []byte
}
func (p *logfmtParser) reset() {
clear(p.Fields)
p.Fields = p.Fields[:0]
p.buf = p.buf[:0]
}
func (p *logfmtParser) parse(s, resultPrefix string) []Field {
clear(p.Fields)
p.Fields = p.Fields[:0]
for {
// Search for field name
n := strings.IndexByte(s, '=')
if n < 0 {
// field name couldn't be read
return p.Fields
}
name := strings.TrimSpace(s[:n])
s = s[n+1:]
if len(s) == 0 {
p.addField(name, "", resultPrefix)
return p.Fields
}
// Search for field value
value, nOffset := tryUnquoteString(s)
if nOffset >= 0 {
p.addField(name, value, resultPrefix)
s = s[nOffset:]
if len(s) == 0 {
return p.Fields
}
if s[0] != ' ' {
return p.Fields
}
s = s[1:]
} else {
n := strings.IndexByte(s, ' ')
if n < 0 {
p.addField(name, s, resultPrefix)
return p.Fields
}
p.addField(name, s[:n], resultPrefix)
s = s[n+1:]
}
}
}
func (p *logfmtParser) addField(name, value, resultPrefix string) {
if resultPrefix != "" {
buf := p.buf
bufLen := len(buf)
buf = append(buf, resultPrefix...)
buf = append(buf, name...)
p.buf = buf
name = bytesutil.ToUnsafeString(buf[bufLen:])
}
p.Fields = append(p.Fields, Field{
Name: name,
Value: value,
})
}

View file

@ -0,0 +1,175 @@
package logstorage
import (
"testing"
)
func TestPipeUnpackLogfmt(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// single row, unpack from _msg
f("unpack_logfmt", [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"foo", "bar"},
{"baz", "x y=z"},
{"a", "b"},
},
})
// single row, unpack from _msg into _msg
f("unpack_logfmt", [][]Field{
{
{"_msg", `_msg=bar`},
},
}, [][]Field{
{
{"_msg", "bar"},
},
})
// single row, unpack from missing field
f("unpack_logfmt from x", [][]Field{
{
{"_msg", `foo=bar`},
},
}, [][]Field{
{
{"_msg", `foo=bar`},
},
})
// single row, unpack from non-json field
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// single row, unpack from non-logfmt
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar`},
},
}, [][]Field{
{
{"x", `foobar`},
},
})
// unpack empty value
f("unpack_logfmt from x", [][]Field{
{
{"x", `foobar=`},
},
}, [][]Field{
{
{"x", `foobar=`},
{"foobar", ""},
},
})
f("unpack_logfmt from x", [][]Field{
{
{"x", `foo="" bar= baz=`},
},
}, [][]Field{
{
{"x", `foo="" bar= baz=`},
{"foo", ""},
{"bar", ""},
{"baz", ""},
},
})
// multiple rows with distinct number of fields
f("unpack_logfmt from x", [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
},
}, [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", "abc"},
{"foo", "bar"},
{"baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `bar`},
{"x", `z=bar`},
},
})
// multiple rows with distinct number of fields, with result_prefix
f("unpack_logfmt from x result_prefix qwe_", [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", `abc`},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
},
}, [][]Field{
{
{"x", `foo=bar baz=xyz`},
{"y", "abc"},
{"qwe_foo", "bar"},
{"qwe_baz", "xyz"},
},
{
{"y", `abc`},
},
{
{"z", `foobar`},
{"x", `z=bar`},
{"qwe_z", `bar`},
},
})
}
func TestPipeUnpackLogfmtUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("unpack_logfmt from x", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unpack_logfmt from x", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("unpack_logfmt from x", "*", "f2,x", "*", "f2")
// needed fields do not intersect with src
f("unpack_logfmt from x", "f1,f2", "", "f1,f2,x", "")
// needed fields intersect with src
f("unpack_logfmt from x", "f2,x", "", "f2,x", "")
}

View file

@ -1,7 +1,6 @@
package logstorage package logstorage
import ( import (
"math"
"slices" "slices"
"strconv" "strconv"
"unsafe" "unsafe"
@ -16,8 +15,8 @@ func (sa *statsAvg) String() string {
return "avg(" + fieldNamesString(sa.fields) + ")" return "avg(" + fieldNamesString(sa.fields) + ")"
} }
func (sa *statsAvg) neededFields() []string { func (sa *statsAvg) updateNeededFields(neededFields fieldsSet) {
return sa.fields neededFields.addFields(sa.fields)
} }
func (sa *statsAvg) newStatsProcessor() (statsProcessor, int) { func (sa *statsAvg) newStatsProcessor() (statsProcessor, int) {
@ -58,8 +57,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
if sap.sa.containsStar { if sap.sa.containsStar {
// Scan all the fields for the given row // Scan all the fields for the given row
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
sap.sum += f sap.sum += f
sap.count++ sap.count++
} }
@ -68,8 +67,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
// Scan only the given fields for the given row // Scan only the given fields for the given row
for _, field := range sap.sa.fields { for _, field := range sap.sa.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
sap.sum += f sap.sum += f
sap.count++ sap.count++
} }

View file

@ -17,12 +17,12 @@ func (sc *statsCount) String() string {
return "count(" + fieldNamesString(sc.fields) + ")" return "count(" + fieldNamesString(sc.fields) + ")"
} }
func (sc *statsCount) neededFields() []string { func (sc *statsCount) updateNeededFields(neededFields fieldsSet) {
if sc.containsStar { if sc.containsStar {
// There is no need in fetching any columns for count(*) - the number of matching rows can be calculated as len(blockResult.timestamps) // There is no need in fetching any columns for count(*) - the number of matching rows can be calculated as len(blockResult.timestamps)
return nil return
} }
return sc.fields neededFields.addFields(sc.fields)
} }
func (sc *statsCount) newStatsProcessor() (statsProcessor, int) { func (sc *statsCount) newStatsProcessor() (statsProcessor, int) {
@ -49,7 +49,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
// Fast path for count(single_column) // Fast path for count(single_column)
c := br.getColumnByName(fields[0]) c := br.getColumnByName(fields[0])
if c.isConst { if c.isConst {
if c.encodedValues[0] != "" { if c.valuesEncoded[0] != "" {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
} }
return 0 return 0
@ -60,7 +60,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
for _, v := range c.encodedValues { for _, v := range c.getValuesEncoded(br) {
if v != "" { if v != "" {
scp.rowsCount++ scp.rowsCount++
} }
@ -72,7 +72,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
for _, v := range c.encodedValues { for _, v := range c.getValuesEncoded(br) {
if int(v[0]) != zeroDictIdx { if int(v[0]) != zeroDictIdx {
scp.rowsCount++ scp.rowsCount++
} }
@ -95,7 +95,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
for _, f := range fields { for _, f := range fields {
c := br.getColumnByName(f) c := br.getColumnByName(f)
if c.isConst { if c.isConst {
if c.encodedValues[0] != "" { if c.valuesEncoded[0] != "" {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
@ -105,18 +105,21 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
return c.encodedValues[i] == "" return valuesEncoded[i] == ""
}) })
case valueTypeDict: case valueTypeDict:
if !slices.Contains(c.dictValues, "") { if !slices.Contains(c.dictValues, "") {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
dictIdx := c.encodedValues[i][0] dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == "" return c.dictValues[dictIdx] == ""
}) })
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601: case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
@ -144,7 +147,7 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
// Fast path for count(single_column) // Fast path for count(single_column)
c := br.getColumnByName(fields[0]) c := br.getColumnByName(fields[0])
if c.isConst { if c.isConst {
if c.encodedValues[0] != "" { if c.valuesEncoded[0] != "" {
scp.rowsCount++ scp.rowsCount++
} }
return 0 return 0
@ -155,12 +158,14 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
if v := c.encodedValues[rowIdx]; v != "" { valuesEncoded := c.getValuesEncoded(br)
if v := valuesEncoded[rowIdx]; v != "" {
scp.rowsCount++ scp.rowsCount++
} }
return 0 return 0
case valueTypeDict: case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0] valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
if v := c.dictValues[dictIdx]; v != "" { if v := c.dictValues[dictIdx]; v != "" {
scp.rowsCount++ scp.rowsCount++
} }

View file

@ -17,8 +17,8 @@ func (sc *statsCountEmpty) String() string {
return "count_empty(" + fieldNamesString(sc.fields) + ")" return "count_empty(" + fieldNamesString(sc.fields) + ")"
} }
func (sc *statsCountEmpty) neededFields() []string { func (sc *statsCountEmpty) updateNeededFields(neededFields fieldsSet) {
return sc.fields neededFields.addFields(sc.fields)
} }
func (sc *statsCountEmpty) newStatsProcessor() (statsProcessor, int) { func (sc *statsCountEmpty) newStatsProcessor() (statsProcessor, int) {
@ -53,7 +53,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
// Fast path for count_empty(single_column) // Fast path for count_empty(single_column)
c := br.getColumnByName(fields[0]) c := br.getColumnByName(fields[0])
if c.isConst { if c.isConst {
if c.encodedValues[0] == "" { if c.valuesEncoded[0] == "" {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
} }
return 0 return 0
@ -63,7 +63,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
for _, v := range c.encodedValues { for _, v := range c.getValuesEncoded(br) {
if v == "" { if v == "" {
scp.rowsCount++ scp.rowsCount++
} }
@ -74,7 +74,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
if zeroDictIdx < 0 { if zeroDictIdx < 0 {
return 0 return 0
} }
for _, v := range c.encodedValues { for _, v := range c.getValuesEncoded(br) {
if int(v[0]) == zeroDictIdx { if int(v[0]) == zeroDictIdx {
scp.rowsCount++ scp.rowsCount++
} }
@ -96,7 +96,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
for _, f := range fields { for _, f := range fields {
c := br.getColumnByName(f) c := br.getColumnByName(f)
if c.isConst { if c.isConst {
if c.encodedValues[0] == "" { if c.valuesEncoded[0] == "" {
scp.rowsCount += uint64(len(br.timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
@ -107,15 +107,17 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
return c.encodedValues[i] == "" return valuesEncoded[i] == ""
}) })
case valueTypeDict: case valueTypeDict:
if !slices.Contains(c.dictValues, "") { if !slices.Contains(c.dictValues, "") {
return 0 return 0
} }
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
dictIdx := c.encodedValues[i][0] dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == "" return c.dictValues[dictIdx] == ""
}) })
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601: case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
@ -145,7 +147,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
// Fast path for count_empty(single_column) // Fast path for count_empty(single_column)
c := br.getColumnByName(fields[0]) c := br.getColumnByName(fields[0])
if c.isConst { if c.isConst {
if c.encodedValues[0] == "" { if c.valuesEncoded[0] == "" {
scp.rowsCount++ scp.rowsCount++
} }
return 0 return 0
@ -155,12 +157,14 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
} }
switch c.valueType { switch c.valueType {
case valueTypeString: case valueTypeString:
if v := c.encodedValues[rowIdx]; v == "" { valuesEncoded := c.getValuesEncoded(br)
if v := valuesEncoded[rowIdx]; v == "" {
scp.rowsCount++ scp.rowsCount++
} }
return 0 return 0
case valueTypeDict: case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0] valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
if v := c.dictValues[dictIdx]; v == "" { if v := c.dictValues[dictIdx]; v == "" {
scp.rowsCount++ scp.rowsCount++
} }

View file

@ -24,8 +24,8 @@ func (su *statsCountUniq) String() string {
return s return s
} }
func (su *statsCountUniq) neededFields() []string { func (su *statsCountUniq) updateNeededFields(neededFields fieldsSet) {
return su.fields neededFields.addFields(su.fields)
} }
func (su *statsCountUniq) newStatsProcessor() (statsProcessor, int) { func (su *statsCountUniq) newStatsProcessor() (statsProcessor, int) {
@ -122,7 +122,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
} }
if c.isConst { if c.isConst {
// count unique const values // count unique const values
v := c.encodedValues[0] v := c.valuesEncoded[0]
if v == "" { if v == "" {
// Do not count empty values // Do not count empty values
return stateSizeIncrease return stateSizeIncrease
@ -156,7 +156,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
return stateSizeIncrease return stateSizeIncrease
} }
// Count unique values across encodedValues // Count unique values across values
values := c.getValues(br) values := c.getValues(br)
keyBuf := sup.keyBuf[:0] keyBuf := sup.keyBuf[:0]
for i, v := range values { for i, v := range values {
@ -278,7 +278,7 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
} }
if c.isConst { if c.isConst {
// count unique const values // count unique const values
v := c.encodedValues[0] v := c.valuesEncoded[0]
if v == "" { if v == "" {
// Do not count empty values // Do not count empty values
return stateSizeIncrease return stateSizeIncrease
@ -295,7 +295,8 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
} }
if c.valueType == valueTypeDict { if c.valueType == valueTypeDict {
// count unique non-zero c.dictValues // count unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0] valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := c.dictValues[dictIdx] v := c.dictValues[dictIdx]
if v == "" { if v == "" {
// Do not count empty values // Do not count empty values

View file

@ -3,8 +3,11 @@ package logstorage
import ( import (
"math" "math"
"slices" "slices"
"strconv" "strings"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
type statsMax struct { type statsMax struct {
@ -16,14 +19,13 @@ func (sm *statsMax) String() string {
return "max(" + fieldNamesString(sm.fields) + ")" return "max(" + fieldNamesString(sm.fields) + ")"
} }
func (sm *statsMax) neededFields() []string { func (sm *statsMax) updateNeededFields(neededFields fieldsSet) {
return sm.fields neededFields.addFields(sm.fields)
} }
func (sm *statsMax) newStatsProcessor() (statsProcessor, int) { func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
smp := &statsMaxProcessor{ smp := &statsMaxProcessor{
sm: sm, sm: sm,
max: nan,
} }
return smp, int(unsafe.Sizeof(*smp)) return smp, int(unsafe.Sizeof(*smp))
} }
@ -31,62 +33,139 @@ func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
type statsMaxProcessor struct { type statsMaxProcessor struct {
sm *statsMax sm *statsMax
max float64 max string
hasMax bool
} }
func (smp *statsMaxProcessor) updateStatsForAllRows(br *blockResult) int { func (smp *statsMaxProcessor) updateStatsForAllRows(br *blockResult) int {
maxLen := len(smp.max)
if smp.sm.containsStar { if smp.sm.containsStar {
// Find the maximum value across all the columns // Find the minimum value across all the columns
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getMaxValue() smp.updateStateForColumn(br, c)
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
}
} }
} else { } else {
// Find the maximum value across the requested columns // Find the minimum value across the requested columns
for _, field := range smp.sm.fields { for _, field := range smp.sm.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getMaxValue() smp.updateStateForColumn(br, c)
if f > smp.max || math.IsNaN(smp.max) {
smp.max = f
}
} }
} }
return 0
return len(smp.max) - maxLen
} }
func (smp *statsMaxProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { func (smp *statsMaxProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
maxLen := len(smp.max)
if smp.sm.containsStar { if smp.sm.containsStar {
// Find the maximum value across all the fields for the given row // Find the minimum value across all the fields for the given row
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx) v := c.getValueAtRow(br, rowIdx)
if f > smp.max || math.IsNaN(smp.max) { smp.updateStateString(v)
smp.max = f
}
} }
} else { } else {
// Find the maximum value across the requested fields for the given row // Find the minimum value across the requested fields for the given row
for _, field := range smp.sm.fields { for _, field := range smp.sm.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx) v := c.getValueAtRow(br, rowIdx)
if f > smp.max || math.IsNaN(smp.max) { smp.updateStateString(v)
smp.max = f
}
} }
} }
return 0
return maxLen - len(smp.max)
} }
func (smp *statsMaxProcessor) mergeState(sfp statsProcessor) { func (smp *statsMaxProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsMaxProcessor) src := sfp.(*statsMaxProcessor)
if src.max > smp.max { if src.hasMax {
smp.max = src.max smp.updateStateString(src.max)
} }
} }
func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
if len(br.timestamps) == 0 {
return
}
if c.isTime {
// Special case for time column
timestamps := br.timestamps
maxTimestamp := timestamps[len(timestamps)-1]
for _, timestamp := range timestamps[:len(timestamps)-1] {
if timestamp > maxTimestamp {
maxTimestamp = timestamp
}
}
bb := bbPool.Get()
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], maxTimestamp)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
return
}
if c.isConst {
// Special case for const column
v := c.valuesEncoded[0]
smp.updateStateString(v)
return
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValuesEncoded(br) {
smp.updateStateString(v)
}
case valueTypeDict:
for _, v := range c.dictValues {
smp.updateStateString(v)
}
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
bb := bbPool.Get()
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.maxValue)
bb := bbPool.Get()
bb.B = marshalFloat64String(bb.B[:0], f)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeIPv4:
bb := bbPool.Get()
bb.B = marshalIPv4String(bb.B[:0], uint32(c.maxValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeTimestampISO8601:
bb := bbPool.Get()
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.maxValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
}
}
func (smp *statsMaxProcessor) updateStateBytes(b []byte) {
v := bytesutil.ToUnsafeString(b)
smp.updateStateString(v)
}
func (smp *statsMaxProcessor) updateStateString(v string) {
if smp.hasMax && !lessString(smp.max, v) {
return
}
smp.max = strings.Clone(v)
smp.hasMax = true
}
func (smp *statsMaxProcessor) finalizeStats() string { func (smp *statsMaxProcessor) finalizeStats() string {
return strconv.FormatFloat(smp.max, 'f', -1, 64) if !smp.hasMax {
return "NaN"
}
return smp.max
} }
func parseStatsMax(lex *lexer) (*statsMax, error) { func parseStatsMax(lex *lexer) (*statsMax, error) {

View file

@ -14,8 +14,8 @@ func (sm *statsMedian) String() string {
return "median(" + fieldNamesString(sm.fields) + ")" return "median(" + fieldNamesString(sm.fields) + ")"
} }
func (sm *statsMedian) neededFields() []string { func (sm *statsMedian) updateNeededFields(neededFields fieldsSet) {
return sm.fields neededFields.addFields(sm.fields)
} }
func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) { func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) {

View file

@ -3,8 +3,11 @@ package logstorage
import ( import (
"math" "math"
"slices" "slices"
"strconv" "strings"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
type statsMin struct { type statsMin struct {
@ -16,14 +19,13 @@ func (sm *statsMin) String() string {
return "min(" + fieldNamesString(sm.fields) + ")" return "min(" + fieldNamesString(sm.fields) + ")"
} }
func (sm *statsMin) neededFields() []string { func (sm *statsMin) updateNeededFields(neededFields fieldsSet) {
return sm.fields neededFields.addFields(sm.fields)
} }
func (sm *statsMin) newStatsProcessor() (statsProcessor, int) { func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
smp := &statsMinProcessor{ smp := &statsMinProcessor{
sm: sm, sm: sm,
min: nan,
} }
return smp, int(unsafe.Sizeof(*smp)) return smp, int(unsafe.Sizeof(*smp))
} }
@ -31,62 +33,139 @@ func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
type statsMinProcessor struct { type statsMinProcessor struct {
sm *statsMin sm *statsMin
min float64 min string
hasMin bool
} }
func (smp *statsMinProcessor) updateStatsForAllRows(br *blockResult) int { func (smp *statsMinProcessor) updateStatsForAllRows(br *blockResult) int {
minLen := len(smp.min)
if smp.sm.containsStar { if smp.sm.containsStar {
// Find the minimum value across all the columns // Find the minimum value across all the columns
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getMinValue() smp.updateStateForColumn(br, c)
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
}
} }
} else { } else {
// Find the minimum value across the requested columns // Find the minimum value across the requested columns
for _, field := range smp.sm.fields { for _, field := range smp.sm.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getMinValue() smp.updateStateForColumn(br, c)
if f < smp.min || math.IsNaN(smp.min) {
smp.min = f
}
} }
} }
return 0
return len(smp.min) - minLen
} }
func (smp *statsMinProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { func (smp *statsMinProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
minLen := len(smp.min)
if smp.sm.containsStar { if smp.sm.containsStar {
// Find the minimum value across all the fields for the given row // Find the minimum value across all the fields for the given row
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx) v := c.getValueAtRow(br, rowIdx)
if f < smp.min || math.IsNaN(smp.min) { smp.updateStateString(v)
smp.min = f
}
} }
} else { } else {
// Find the minimum value across the requested fields for the given row // Find the minimum value across the requested fields for the given row
for _, field := range smp.sm.fields { for _, field := range smp.sm.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx) v := c.getValueAtRow(br, rowIdx)
if f < smp.min || math.IsNaN(smp.min) { smp.updateStateString(v)
smp.min = f
}
} }
} }
return 0
return minLen - len(smp.min)
} }
func (smp *statsMinProcessor) mergeState(sfp statsProcessor) { func (smp *statsMinProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsMinProcessor) src := sfp.(*statsMinProcessor)
if src.min < smp.min { if src.hasMin {
smp.min = src.min smp.updateStateString(src.min)
} }
} }
func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
if len(br.timestamps) == 0 {
return
}
if c.isTime {
// Special case for time column
timestamps := br.timestamps
minTimestamp := timestamps[0]
for _, timestamp := range timestamps[1:] {
if timestamp < minTimestamp {
minTimestamp = timestamp
}
}
bb := bbPool.Get()
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], minTimestamp)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
return
}
if c.isConst {
// Special case for const column
v := c.valuesEncoded[0]
smp.updateStateString(v)
return
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValuesEncoded(br) {
smp.updateStateString(v)
}
case valueTypeDict:
for _, v := range c.dictValues {
smp.updateStateString(v)
}
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
bb := bbPool.Get()
bb.B = marshalUint64String(bb.B[:0], c.minValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.minValue)
bb := bbPool.Get()
bb.B = marshalFloat64String(bb.B[:0], f)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeIPv4:
bb := bbPool.Get()
bb.B = marshalIPv4String(bb.B[:0], uint32(c.minValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeTimestampISO8601:
bb := bbPool.Get()
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.minValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
}
}
func (smp *statsMinProcessor) updateStateBytes(b []byte) {
v := bytesutil.ToUnsafeString(b)
smp.updateStateString(v)
}
func (smp *statsMinProcessor) updateStateString(v string) {
if smp.hasMin && !lessString(v, smp.min) {
return
}
smp.min = strings.Clone(v)
smp.hasMin = true
}
func (smp *statsMinProcessor) finalizeStats() string { func (smp *statsMinProcessor) finalizeStats() string {
return strconv.FormatFloat(smp.min, 'f', -1, 64) if !smp.hasMin {
return "NaN"
}
return smp.min
} }
func parseStatsMin(lex *lexer) (*statsMin, error) { func parseStatsMin(lex *lexer) (*statsMin, error) {

View file

@ -8,6 +8,9 @@ import (
"unsafe" "unsafe"
"github.com/valyala/fastrand" "github.com/valyala/fastrand"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
type statsQuantile struct { type statsQuantile struct {
@ -21,8 +24,8 @@ func (sq *statsQuantile) String() string {
return fmt.Sprintf("quantile(%g, %s)", sq.phi, fieldNamesString(sq.fields)) return fmt.Sprintf("quantile(%g, %s)", sq.phi, fieldNamesString(sq.fields))
} }
func (sq *statsQuantile) neededFields() []string { func (sq *statsQuantile) updateNeededFields(neededFields fieldsSet) {
return sq.fields neededFields.addFields(sq.fields)
} }
func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) { func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) {
@ -39,27 +42,16 @@ type statsQuantileProcessor struct {
} }
func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int { func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int {
h := &sqp.h
stateSizeIncrease := 0 stateSizeIncrease := 0
if sqp.sq.containsStar { if sqp.sq.containsStar {
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
for _, v := range c.getValues(br) { stateSizeIncrease += sqp.updateStateForColumn(br, c)
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
} }
} else { } else {
for _, field := range sqp.sq.fields { for _, field := range sqp.sq.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
for _, v := range c.getValues(br) { stateSizeIncrease += sqp.updateStateForColumn(br, c)
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
} }
} }
@ -72,16 +64,16 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
if sqp.sq.containsStar { if sqp.sq.containsStar {
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
stateSizeIncrease += h.update(f) stateSizeIncrease += h.update(f)
} }
} }
} else { } else {
for _, field := range sqp.sq.fields { for _, field := range sqp.sq.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
stateSizeIncrease += h.update(f) stateSizeIncrease += h.update(f)
} }
} }
@ -90,6 +82,85 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
return stateSizeIncrease return stateSizeIncrease
} }
func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) int {
h := &sqp.h
stateSizeIncrease := 0
if c.isConst {
f, ok := tryParseFloat64(c.valuesEncoded[0])
if ok {
for range br.timestamps {
stateSizeIncrease += h.update(f)
}
}
return stateSizeIncrease
}
if c.isTime {
return 0
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValues(br) {
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
case valueTypeDict:
dictValues := c.dictValues
a := encoding.GetFloat64s(len(dictValues))
for i, v := range dictValues {
f, ok := tryParseFloat64(v)
if !ok {
f = nan
}
a.A[i] = f
}
for _, v := range c.getValuesEncoded(br) {
idx := v[0]
f := a.A[idx]
if !math.IsNaN(f) {
h.update(f)
}
}
encoding.PutFloat64s(a)
case valueTypeUint8:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint8(v)
h.update(float64(n))
}
case valueTypeUint16:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint16(v)
h.update(float64(n))
}
case valueTypeUint32:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint32(v)
h.update(float64(n))
}
case valueTypeUint64:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint64(v)
h.update(float64(n))
}
case valueTypeFloat64:
for _, v := range c.getValuesEncoded(br) {
f := unmarshalFloat64(v)
if !math.IsNaN(f) {
h.update(f)
}
}
case valueTypeIPv4:
case valueTypeTimestampISO8601:
default:
logger.Panicf("BUG: unexpected valueType=%d", c.valueType)
}
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) { func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsQuantileProcessor) src := sfp.(*statsQuantileProcessor)
sqp.h.mergeState(&src.h) sqp.h.mergeState(&src.h)

View file

@ -16,8 +16,8 @@ func (ss *statsSum) String() string {
return "sum(" + fieldNamesString(ss.fields) + ")" return "sum(" + fieldNamesString(ss.fields) + ")"
} }
func (ss *statsSum) neededFields() []string { func (ss *statsSum) updateNeededFields(neededFields fieldsSet) {
return ss.fields neededFields.addFields(ss.fields)
} }
func (ss *statsSum) newStatsProcessor() (statsProcessor, int) { func (ss *statsSum) newStatsProcessor() (statsProcessor, int) {
@ -38,27 +38,13 @@ func (ssp *statsSumProcessor) updateStatsForAllRows(br *blockResult) int {
if ssp.ss.containsStar { if ssp.ss.containsStar {
// Sum all the columns // Sum all the columns
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f, count := c.sumValues(br) ssp.updateStateForColumn(br, c)
if count > 0 {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
} }
} else { } else {
// Sum the requested columns // Sum the requested columns
for _, field := range ssp.ss.fields { for _, field := range ssp.ss.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f, count := c.sumValues(br) ssp.updateStateForColumn(br, c)
if count > 0 {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
} }
} }
return 0 return 0
@ -68,32 +54,39 @@ func (ssp *statsSumProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
if ssp.ss.containsStar { if ssp.ss.containsStar {
// Sum all the fields for the given row // Sum all the fields for the given row
for _, c := range br.getColumns() { for _, c := range br.getColumns() {
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
if math.IsNaN(ssp.sum) { ssp.updateState(f)
ssp.sum = f
} else {
ssp.sum += f
}
} }
} }
} else { } else {
// Sum only the given fields for the given row // Sum only the given fields for the given row
for _, field := range ssp.ss.fields { for _, field := range ssp.ss.fields {
c := br.getColumnByName(field) c := br.getColumnByName(field)
f := c.getFloatValueAtRow(rowIdx) f, ok := c.getFloatValueAtRow(br, rowIdx)
if !math.IsNaN(f) { if ok {
if math.IsNaN(ssp.sum) { ssp.updateState(f)
ssp.sum = f
} else {
ssp.sum += f
}
} }
} }
} }
return 0 return 0
} }
func (ssp *statsSumProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
f, count := c.sumValues(br)
if count > 0 {
ssp.updateState(f)
}
}
func (ssp *statsSumProcessor) updateState(f float64) {
if math.IsNaN(ssp.sum) {
ssp.sum = f
} else {
ssp.sum += f
}
}
func (ssp *statsSumProcessor) mergeState(sfp statsProcessor) { func (ssp *statsSumProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsSumProcessor) src := sfp.(*statsSumProcessor)
ssp.sum += src.sum ssp.sum += src.sum

View file

@ -15,8 +15,8 @@ func (ss *statsSumLen) String() string {
return "sum_len(" + fieldNamesString(ss.fields) + ")" return "sum_len(" + fieldNamesString(ss.fields) + ")"
} }
func (ss *statsSumLen) neededFields() []string { func (ss *statsSumLen) updateNeededFields(neededFields fieldsSet) {
return ss.fields neededFields.addFields(ss.fields)
} }
func (ss *statsSumLen) newStatsProcessor() (statsProcessor, int) { func (ss *statsSumLen) newStatsProcessor() (statsProcessor, int) {

View file

@ -24,8 +24,8 @@ func (su *statsUniqValues) String() string {
return s return s
} }
func (su *statsUniqValues) neededFields() []string { func (su *statsUniqValues) updateNeededFields(neededFields fieldsSet) {
return su.fields neededFields.addFields(su.fields)
} }
func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) { func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) {
@ -68,7 +68,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultC
stateSizeIncrease := 0 stateSizeIncrease := 0
if c.isConst { if c.isConst {
// collect unique const values // collect unique const values
v := c.encodedValues[0] v := c.valuesEncoded[0]
if v == "" { if v == "" {
// skip empty values // skip empty values
return stateSizeIncrease return stateSizeIncrease
@ -141,7 +141,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
stateSizeIncrease := 0 stateSizeIncrease := 0
if c.isConst { if c.isConst {
// collect unique const values // collect unique const values
v := c.encodedValues[0] v := c.valuesEncoded[0]
if v == "" { if v == "" {
// skip empty values // skip empty values
return stateSizeIncrease return stateSizeIncrease
@ -155,7 +155,8 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
} }
if c.valueType == valueTypeDict { if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues // collect unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0] valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := c.dictValues[dictIdx] v := c.dictValues[dictIdx]
if v == "" { if v == "" {
// skip empty values // skip empty values

View file

@ -21,8 +21,8 @@ func (sv *statsValues) String() string {
return s return s
} }
func (sv *statsValues) neededFields() []string { func (sv *statsValues) updateNeededFields(neededFields fieldsSet) {
return sv.fields neededFields.addFields(sv.fields)
} }
func (sv *statsValues) newStatsProcessor() (statsProcessor, int) { func (sv *statsValues) newStatsProcessor() (statsProcessor, int) {
@ -61,7 +61,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRows(br *blockResult) int {
func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int { func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
stateSizeIncrease := 0 stateSizeIncrease := 0
if c.isConst { if c.isConst {
v := strings.Clone(c.encodedValues[0]) v := strings.Clone(c.valuesEncoded[0])
stateSizeIncrease += len(v) stateSizeIncrease += len(v)
values := svp.values values := svp.values
@ -81,7 +81,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColum
} }
values := svp.values values := svp.values
for _, encodedValue := range c.encodedValues { for _, encodedValue := range c.getValuesEncoded(br) {
idx := encodedValue[0] idx := encodedValue[0]
values = append(values, dictValues[idx]) values = append(values, dictValues[idx])
} }
@ -128,7 +128,7 @@ func (svp *statsValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int)
func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int { func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
stateSizeIncrease := 0 stateSizeIncrease := 0
if c.isConst { if c.isConst {
v := strings.Clone(c.encodedValues[0]) v := strings.Clone(c.valuesEncoded[0])
stateSizeIncrease += len(v) stateSizeIncrease += len(v)
svp.values = append(svp.values, v) svp.values = append(svp.values, v)
@ -138,7 +138,8 @@ func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, b
} }
if c.valueType == valueTypeDict { if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues // collect unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0] valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
v := strings.Clone(c.dictValues[dictIdx]) v := strings.Clone(c.dictValues[dictIdx])
stateSizeIncrease += len(v) stateSizeIncrease += len(v)

View file

@ -2,12 +2,15 @@ package logstorage
import ( import (
"context" "context"
"fmt"
"math" "math"
"slices" "slices"
"sort" "sort"
"strings"
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
// genericSearchOptions contain options used for search. // genericSearchOptions contain options used for search.
@ -60,8 +63,44 @@ type searchOptions struct {
needAllColumns bool needAllColumns bool
} }
// WriteBlockFunc must write a block with the given timestamps and columns.
//
// WriteBlockFunc cannot hold references to timestamps and columns after returning.
type WriteBlockFunc func(workerID uint, timestamps []int64, columns []BlockColumn)
// RunQuery runs the given q and calls writeBlock for results. // RunQuery runs the given q and calls writeBlock for results.
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock func(workerID uint, timestamps []int64, columns []BlockColumn)) error { func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) error {
qNew, err := s.initFilterInValues(ctx, tenantIDs, q)
if err != nil {
return err
}
writeBlockResult := func(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
brs := getBlockRows()
csDst := brs.cs
cs := br.getColumns()
for _, c := range cs {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
writeBlock(workerID, br.timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
}
return s.runQuery(ctx, tenantIDs, qNew, writeBlockResult)
}
func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlockResultFunc func(workerID uint, br *blockResult)) error {
neededColumnNames, unneededColumnNames := q.getNeededColumns() neededColumnNames, unneededColumnNames := q.getNeededColumns()
so := &genericSearchOptions{ so := &genericSearchOptions{
tenantIDs: tenantIDs, tenantIDs: tenantIDs,
@ -73,24 +112,8 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
workersCount := cgroup.AvailableCPUs() workersCount := cgroup.AvailableCPUs()
pp := newDefaultPipeProcessor(func(workerID uint, br *blockResult) { ppMain := newDefaultPipeProcessor(writeBlockResultFunc)
brs := getBlockRows() pp := ppMain
csDst := brs.cs
for _, c := range br.getColumns() {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
writeBlock(workerID, br.timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
})
ppMain := pp
stopCh := ctx.Done() stopCh := ctx.Done()
cancels := make([]func(), len(q.pipes)) cancels := make([]func(), len(q.pipes))
pps := make([]pipeProcessor, len(q.pipes)) pps := make([]pipeProcessor, len(q.pipes))
@ -121,6 +144,202 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
return errFlush return errFlush
} }
// GetFieldNames returns field names from q results for the given tenantIDs.
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
// add `field_names ...` to the end of q.pipes
pipes := append([]pipe{}, q.pipes...)
pipeStr := "field_names as names"
lex := newLexer(pipeStr)
pf, err := parsePipeFieldNames(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'field_names' pipe: %s", err)
}
pf.isFirstPipe = len(pipes) == 0
pipes = append(pipes, pf)
q = &Query{
f: q.f,
pipes: pipes,
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
}
// GetFieldValues returns unique values for the given fieldName returned by q for the given tenantIDs.
//
// If limit > 0, then up to limit unique values are returned. The values are returned in arbitrary order because of performance reasons.
// The caller may sort the returned values if needed.
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]string, error) {
// add 'uniq fieldName' to the end of q.pipes
if !endsWithPipeUniqSingleField(q.pipes, fieldName) {
pipes := append([]pipe{}, q.pipes...)
pipeStr := fmt.Sprintf("uniq by (%s) limit %d", quoteTokenIfNeeded(fieldName), limit)
lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe: %s", err)
}
pipes = append(pipes, pu)
q = &Query{
f: q.f,
pipes: pipes,
}
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
}
func endsWithPipeUniqSingleField(pipes []pipe, fieldName string) bool {
if len(pipes) == 0 {
return false
}
pu, ok := pipes[len(pipes)-1].(*pipeUniq)
if !ok {
return false
}
return len(pu.byFields) == 1 && pu.byFields[0] == fieldName
}
func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
var values []string
var valuesLock sync.Mutex
writeBlockResult := func(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
cs := br.getColumns()
if len(cs) != 1 {
logger.Panicf("BUG: expecting only a single column; got %d columns", len(cs))
}
columnValues := cs[0].getValues(br)
columnValuesCopy := make([]string, len(columnValues))
for i, v := range columnValues {
columnValuesCopy[i] = strings.Clone(v)
}
valuesLock.Lock()
values = append(values, columnValuesCopy...)
valuesLock.Unlock()
}
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
if err != nil {
return nil, err
}
return values, nil
}
func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
if !hasFilterInWithQueryForFilter(q.f) && !hasFilterInWithQueryForPipes(q.pipes) {
return q, nil
}
getFieldValues := func(q *Query, fieldName string) ([]string, error) {
return s.GetFieldValues(ctx, tenantIDs, q, fieldName, 0)
}
cache := make(map[string][]string)
fNew, err := initFilterInValuesForFilter(cache, q.f, getFieldValues)
if err != nil {
return nil, err
}
pipesNew, err := initFilterInValuesForPipes(cache, q.pipes, getFieldValues)
if err != nil {
return nil, err
}
qNew := &Query{
f: fNew,
pipes: pipesNew,
}
return qNew, nil
}
func hasFilterInWithQueryForFilter(f filter) bool {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
return ok && fi.needExecuteQuery
}
return visitFilter(f, visitFunc)
}
func hasFilterInWithQueryForPipes(pipes []pipe) bool {
for _, p := range pipes {
ps, ok := p.(*pipeStats)
if !ok {
continue
}
for _, f := range ps.funcs {
if f.iff != nil && hasFilterInWithQueryForFilter(f.iff) {
return true
}
}
}
return false
}
type getFieldValuesFunc func(q *Query, fieldName string) ([]string, error)
func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldValuesFunc getFieldValuesFunc) (filter, error) {
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
return ok && fi.needExecuteQuery
}
copyFunc := func(f filter) (filter, error) {
fi := f.(*filterIn)
qStr := fi.q.String()
values, ok := cache[qStr]
if !ok {
vs, err := getFieldValuesFunc(fi.q, fi.qFieldName)
if err != nil {
return nil, fmt.Errorf("cannot obtain unique values for %s: %w", fi, err)
}
cache[qStr] = vs
values = vs
}
fiNew := &filterIn{
fieldName: fi.fieldName,
q: fi.q,
values: values,
}
return fiNew, nil
}
return copyFilter(f, visitFunc, copyFunc)
}
func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
pipesNew := make([]pipe, len(pipes))
for i, p := range pipes {
switch t := p.(type) {
case *pipeStats:
funcsNew := make([]pipeStatsFunc, len(t.funcs))
for j, f := range t.funcs {
if f.iff != nil {
fNew, err := initFilterInValuesForFilter(cache, f.iff, getFieldValuesFunc)
if err != nil {
return nil, err
}
f.iff = fNew
}
funcsNew[j] = f
}
pipesNew[i] = &pipeStats{
byFields: t.byFields,
funcs: funcsNew,
}
default:
pipesNew[i] = p
}
}
return pipesNew, nil
}
type blockRows struct { type blockRows struct {
cs []BlockColumn cs []BlockColumn
} }
@ -169,7 +388,7 @@ type searchResultFunc func(workerID uint, br *blockResult)
// search searches for the matching rows according to so. // search searches for the matching rows according to so.
// //
// It calls processBlockResult for each found matching block. // It calls processBlockResult for each matching block.
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) { func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
// Spin up workers // Spin up workers
var wgWorkers sync.WaitGroup var wgWorkers sync.WaitGroup
@ -178,6 +397,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
for i := 0; i < workersCount; i++ { for i := 0; i < workersCount; i++ {
go func(workerID uint) { go func(workerID uint) {
bs := getBlockSearch() bs := getBlockSearch()
bm := getBitmap(0)
for bswb := range workCh { for bswb := range workCh {
bsws := bswb.bsws bsws := bswb.bsws
for i := range bsws { for i := range bsws {
@ -188,7 +408,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
continue continue
} }
bs.search(bsw) bs.search(bsw, bm)
if len(bs.br.timestamps) > 0 { if len(bs.br.timestamps) > 0 {
processBlockResult(workerID, &bs.br) processBlockResult(workerID, &bs.br)
} }
@ -198,22 +418,24 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
putBlockSearchWorkBatch(bswb) putBlockSearchWorkBatch(bswb)
} }
putBlockSearch(bs) putBlockSearch(bs)
putBitmap(bm)
wgWorkers.Done() wgWorkers.Done()
}(uint(i)) }(uint(i))
} }
// Obtain common time filter from so.filter // Obtain time range from so.filter
ft, f := getCommonFilterTime(so.filter) f := so.filter
minTimestamp, maxTimestamp := getFilterTimeRange(f)
// Select partitions according to the selected time range // Select partitions according to the selected time range
s.partitionsLock.Lock() s.partitionsLock.Lock()
ptws := s.partitions ptws := s.partitions
minDay := ft.minTimestamp / nsecPerDay minDay := minTimestamp / nsecPerDay
n := sort.Search(len(ptws), func(i int) bool { n := sort.Search(len(ptws), func(i int) bool {
return ptws[i].day >= minDay return ptws[i].day >= minDay
}) })
ptws = ptws[n:] ptws = ptws[n:]
maxDay := ft.maxTimestamp / nsecPerDay maxDay := maxTimestamp / nsecPerDay
n = sort.Search(len(ptws), func(i int) bool { n = sort.Search(len(ptws), func(i int) bool {
return ptws[i].day > maxDay return ptws[i].day > maxDay
}) })
@ -234,7 +456,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
partitionSearchConcurrencyLimitCh <- struct{}{} partitionSearchConcurrencyLimitCh <- struct{}{}
wgSearchers.Add(1) wgSearchers.Add(1)
go func(idx int, pt *partition) { go func(idx int, pt *partition) {
psfs[idx] = pt.search(ft, sf, f, so, workCh, stopCh) psfs[idx] = pt.search(minTimestamp, maxTimestamp, sf, f, so, workCh, stopCh)
wgSearchers.Done() wgSearchers.Done()
<-partitionSearchConcurrencyLimitCh <-partitionSearchConcurrencyLimitCh
}(i, ptw.pt) }(i, ptw.pt)
@ -263,7 +485,7 @@ var partitionSearchConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs
type partitionSearchFinalizer func() type partitionSearchFinalizer func()
func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer { func (pt *partition) search(minTimestamp, maxTimestamp int64, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
if needStop(stopCh) { if needStop(stopCh) {
// Do not spend CPU time on search, since it is already stopped. // Do not spend CPU time on search, since it is already stopped.
return func() {} return func() {}
@ -281,8 +503,8 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
soInternal := &searchOptions{ soInternal := &searchOptions{
tenantIDs: tenantIDs, tenantIDs: tenantIDs,
streamIDs: streamIDs, streamIDs: streamIDs,
minTimestamp: ft.minTimestamp, minTimestamp: minTimestamp,
maxTimestamp: ft.maxTimestamp, maxTimestamp: maxTimestamp,
filter: f, filter: f,
neededColumnNames: so.neededColumnNames, neededColumnNames: so.neededColumnNames,
unneededColumnNames: so.unneededColumnNames, unneededColumnNames: so.unneededColumnNames,
@ -292,60 +514,32 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
} }
func hasStreamFilters(f filter) bool { func hasStreamFilters(f filter) bool {
switch t := f.(type) { visitFunc := func(f filter) bool {
case *filterAnd: _, ok := f.(*filterStream)
return hasStreamFiltersInList(t.filters) return ok
case *filterOr:
return hasStreamFiltersInList(t.filters)
case *filterNot:
return hasStreamFilters(t.f)
case *filterStream:
return true
default:
return false
} }
} return visitFilter(f, visitFunc)
func hasStreamFiltersInList(filters []filter) bool {
for _, f := range filters {
if hasStreamFilters(f) {
return true
}
}
return false
} }
func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter { func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
switch t := f.(type) { visitFunc := func(f filter) bool {
case *filterAnd: _, ok := f.(*filterStream)
return &filterAnd{ return ok
filters: initStreamFiltersList(tenantIDs, idb, t.filters), }
} copyFunc := func(f filter) (filter, error) {
case *filterOr: fs := f.(*filterStream)
return &filterOr{ fsNew := &filterStream{
filters: initStreamFiltersList(tenantIDs, idb, t.filters), f: fs.f,
}
case *filterNot:
return &filterNot{
f: initStreamFilters(tenantIDs, idb, t.f),
}
case *filterStream:
return &filterStream{
f: t.f,
tenantIDs: tenantIDs, tenantIDs: tenantIDs,
idb: idb, idb: idb,
} }
default: return fsNew, nil
return t
} }
} f, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter { logger.Panicf("BUG: unexpected error: %s", err)
result := make([]filter, len(filters))
for i, f := range filters {
result[i] = initStreamFilters(tenantIDs, idb, f)
} }
return result return f
} }
func (ddb *datadb) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer { func (ddb *datadb) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
@ -646,23 +840,25 @@ func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
return nil, f return nil, f
} }
func getCommonFilterTime(f filter) (*filterTime, filter) { func getFilterTimeRange(f filter) (int64, int64) {
switch t := f.(type) { switch t := f.(type) {
case *filterAnd: case *filterAnd:
minTimestamp := int64(math.MinInt64)
maxTimestamp := int64(math.MaxInt64)
for _, filter := range t.filters { for _, filter := range t.filters {
ft, ok := filter.(*filterTime) ft, ok := filter.(*filterTime)
if ok { if ok {
// The ft must remain in t.filters order to properly filter out rows outside the selected time range if ft.minTimestamp > minTimestamp {
return ft, f minTimestamp = ft.minTimestamp
}
if ft.maxTimestamp < maxTimestamp {
maxTimestamp = ft.maxTimestamp
}
} }
} }
return minTimestamp, maxTimestamp
case *filterTime: case *filterTime:
return t, f return t.minTimestamp, t.maxTimestamp
} }
return allFilterTime, f return math.MinInt64, math.MaxInt64
}
var allFilterTime = &filterTime{
minTimestamp: math.MinInt64,
maxTimestamp: math.MaxInt64,
} }

View file

@ -78,6 +78,14 @@ func TestStorageRunQuery(t *testing.T) {
} }
s.debugFlush() s.debugFlush()
mustRunQuery := func(tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) {
t.Helper()
err := s.RunQuery(context.Background(), tenantIDs, q, writeBlock)
if err != nil {
t.Fatalf("unexpected error returned from the query %s: %s", q, err)
}
}
// run tests on the storage data // run tests on the storage data
t.Run("missing-tenant", func(_ *testing.T) { t.Run("missing-tenant", func(_ *testing.T) {
q := mustParseQuery(`"log message"`) q := mustParseQuery(`"log message"`)
@ -89,7 +97,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps))) panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
}) })
t.Run("missing-message-text", func(_ *testing.T) { t.Run("missing-message-text", func(_ *testing.T) {
q := mustParseQuery(`foobar`) q := mustParseQuery(`foobar`)
@ -101,7 +109,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps))) panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
}) })
t.Run("matching-tenant-id", func(t *testing.T) { t.Run("matching-tenant-id", func(t *testing.T) {
q := mustParseQuery(`tenant.id:*`) q := mustParseQuery(`tenant.id:*`)
@ -135,7 +143,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -149,7 +157,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) { writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock)) mustRunQuery(allTenantIDs, q, writeBlock)
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -162,7 +170,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) { writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock)) mustRunQuery(allTenantIDs, q, writeBlock)
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -174,7 +182,7 @@ func TestStorageRunQuery(t *testing.T) {
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) { writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps))) panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
} }
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock)) mustRunQuery(allTenantIDs, q, writeBlock)
}) })
t.Run("matching-stream-id", func(t *testing.T) { t.Run("matching-stream-id", func(t *testing.T) {
for i := 0; i < streamsPerTenant; i++ { for i := 0; i < streamsPerTenant; i++ {
@ -208,7 +216,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := blocksPerStream * rowsPerBlock expectedRowsCount := blocksPerStream * rowsPerBlock
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -227,7 +235,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream * 2 expectedRowsCount := streamsPerTenant * blocksPerStream * 2
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -247,7 +255,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := streamsPerTenant * blocksPerStream expectedRowsCount := streamsPerTenant * blocksPerStream
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -267,7 +275,7 @@ func TestStorageRunQuery(t *testing.T) {
rowsCountTotal.Add(uint32(len(timestamps))) rowsCountTotal.Add(uint32(len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
expectedRowsCount := blocksPerStream expectedRowsCount := blocksPerStream
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) { if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
@ -286,7 +294,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps))) panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
}) })
t.Run("missing-time-range", func(_ *testing.T) { t.Run("missing-time-range", func(_ *testing.T) {
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9 minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
@ -300,7 +308,7 @@ func TestStorageRunQuery(t *testing.T) {
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps))) panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
} }
tenantIDs := []TenantID{tenantID} tenantIDs := []TenantID{tenantID}
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock)) mustRunQuery(tenantIDs, q, writeBlock)
}) })
// Close the storage and delete its data // Close the storage and delete its data
@ -308,13 +316,6 @@ func TestStorageRunQuery(t *testing.T) {
fs.MustRemoveAll(path) fs.MustRemoveAll(path)
} }
func checkErr(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
}
func mustParseQuery(query string) *Query { func mustParseQuery(query string) *Query {
q, err := ParseQuery(query) q, err := ParseQuery(query)
if err != nil { if err != nil {
@ -470,9 +471,6 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps))) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -504,7 +502,7 @@ func TestStorageSearch(t *testing.T) {
} }
}) })
t.Run("stream-filter-mismatch", func(_ *testing.T) { t.Run("stream-filter-mismatch", func(_ *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`) sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
minTimestamp := baseTimestamp minTimestamp := baseTimestamp
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
f := getBaseFilter(minTimestamp, maxTimestamp, sf) f := getBaseFilter(minTimestamp, maxTimestamp, sf)
@ -520,7 +518,7 @@ func TestStorageSearch(t *testing.T) {
}) })
t.Run("matching-stream-id", func(t *testing.T) { t.Run("matching-stream-id", func(t *testing.T) {
for i := 0; i < streamsPerTenant; i++ { for i := 0; i < streamsPerTenant; i++ {
sf := mustNewStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i)) sf := mustNewTestStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
tenantID := TenantID{ tenantID := TenantID{
AccountID: 1, AccountID: 1,
ProjectID: 11, ProjectID: 11,
@ -535,9 +533,6 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps))) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -549,7 +544,7 @@ func TestStorageSearch(t *testing.T) {
} }
}) })
t.Run("matching-multiple-stream-ids", func(t *testing.T) { t.Run("matching-multiple-stream-ids", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`) sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
tenantID := TenantID{ tenantID := TenantID{
AccountID: 1, AccountID: 1,
ProjectID: 11, ProjectID: 11,
@ -564,9 +559,6 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps))) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -577,7 +569,7 @@ func TestStorageSearch(t *testing.T) {
} }
}) })
t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) { t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`) sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
tenantID := TenantID{ tenantID := TenantID{
AccountID: 1, AccountID: 1,
ProjectID: 11, ProjectID: 11,
@ -601,9 +593,6 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
}
rowsCountTotal.Add(uint32(len(br.timestamps))) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -614,7 +603,7 @@ func TestStorageSearch(t *testing.T) {
} }
}) })
t.Run("matching-stream-id-smaller-time-range", func(t *testing.T) { t.Run("matching-stream-id-smaller-time-range", func(t *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`) sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
tenantID := TenantID{ tenantID := TenantID{
AccountID: 1, AccountID: 1,
ProjectID: 11, ProjectID: 11,
@ -639,7 +628,7 @@ func TestStorageSearch(t *testing.T) {
} }
}) })
t.Run("matching-stream-id-missing-time-range", func(_ *testing.T) { t.Run("matching-stream-id-missing-time-range", func(_ *testing.T) {
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`) sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
tenantID := TenantID{ tenantID := TenantID{
AccountID: 1, AccountID: 1,
ProjectID: 11, ProjectID: 11,
@ -661,11 +650,3 @@ func TestStorageSearch(t *testing.T) {
s.MustClose() s.MustClose()
fs.MustRemoveAll(path) fs.MustRemoveAll(path)
} }
func mustNewStreamFilter(s string) *StreamFilter {
sf, err := newStreamFilter(s)
if err != nil {
panic(fmt.Errorf("unexpected error in newStreamFilter(%q): %w", s, err))
}
return sf
}

View file

@ -1,11 +1,14 @@
package logstorage package logstorage
import ( import (
"fmt"
"strconv" "strconv"
"strings" "strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
@ -14,6 +17,29 @@ type StreamFilter struct {
orFilters []*andStreamFilter orFilters []*andStreamFilter
} }
func (sf *StreamFilter) matchStreamName(s string) bool {
sn := getStreamName()
defer putStreamName(sn)
if !sn.parse(s) {
return false
}
for _, of := range sf.orFilters {
matchAndFilters := true
for _, tf := range of.tagFilters {
if !sn.match(tf) {
matchAndFilters = false
break
}
}
if matchAndFilters {
return true
}
}
return false
}
func (sf *StreamFilter) isEmpty() bool { func (sf *StreamFilter) isEmpty() bool {
for _, af := range sf.orFilters { for _, af := range sf.orFilters {
if len(af.tagFilters) > 0 { if len(af.tagFilters) > 0 {
@ -69,10 +95,199 @@ type streamTagFilter struct {
regexp *regexutil.PromRegex regexp *regexutil.PromRegex
} }
func (tf *streamTagFilter) getRegexp() *regexutil.PromRegex {
return tf.regexp
}
func (tf *streamTagFilter) String() string { func (tf *streamTagFilter) String() string {
return quoteTokenIfNeeded(tf.tagName) + tf.op + strconv.Quote(tf.value) return quoteTokenIfNeeded(tf.tagName) + tf.op + strconv.Quote(tf.value)
} }
func parseStreamFilter(lex *lexer) (*StreamFilter, error) {
if !lex.isKeyword("{") {
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
}
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after '{'")
}
var filters []*andStreamFilter
for {
f, err := parseAndStreamFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("}"):
lex.nextToken()
sf := &StreamFilter{
orFilters: filters,
}
return sf, nil
case lex.isKeyword("or"):
if !lex.mustNextToken() {
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
}
if lex.isKeyword("}") {
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
}
default:
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
}
}
}
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
var filters []*streamTagFilter
for {
if lex.isKeyword("}") {
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
}
f, err := parseStreamTagFilter(lex)
if err != nil {
return nil, err
}
filters = append(filters, f)
switch {
case lex.isKeyword("or", "}"):
asf := &andStreamFilter{
tagFilters: filters,
}
return asf, nil
case lex.isKeyword(","):
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing stream filter after ','")
}
default:
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
}
}
}
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
tagName := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
}
if !lex.isKeyword("=", "!=", "=~", "!~") {
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
}
op := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
}
value := lex.token
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
}
stf := &streamTagFilter{
tagName: tagName,
op: op,
value: value,
}
if op == "=~" || op == "!~" {
re, err := regexutil.NewPromRegex(value)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
}
stf.regexp = re
}
return stf, nil
}
func getStreamName() *streamName {
v := streamNamePool.Get()
if v == nil {
return &streamName{}
}
return v.(*streamName)
}
func putStreamName(sn *streamName) {
sn.reset()
streamNamePool.Put(sn)
}
var streamNamePool sync.Pool
type streamName struct {
tags []Field
}
func (sn *streamName) reset() {
clear(sn.tags)
sn.tags = sn.tags[:0]
}
func (sn *streamName) parse(s string) bool {
if len(s) < 2 || s[0] != '{' || s[len(s)-1] != '}' {
return false
}
s = s[1 : len(s)-1]
if len(s) == 0 {
return true
}
for {
// Parse tag name
n := strings.IndexByte(s, '=')
if n < 0 {
// cannot find tag name
return false
}
name := s[:n]
s = s[n+1:]
// Parse tag value
if len(s) == 0 || s[0] != '"' {
return false
}
qPrefix, err := strconv.QuotedPrefix(s)
if err != nil {
return false
}
s = s[len(qPrefix):]
value, err := strconv.Unquote(qPrefix)
if err != nil {
return false
}
sn.tags = append(sn.tags, Field{
Name: name,
Value: value,
})
if len(s) == 0 {
return true
}
if s[0] != ',' {
return false
}
s = s[1:]
}
}
func (sn *streamName) match(tf *streamTagFilter) bool {
v := sn.getTagValueByTagName(tf.tagName)
switch tf.op {
case "=":
return v == tf.value
case "!=":
return v != tf.value
case "=~":
return tf.regexp.MatchString(v)
case "!~":
return !tf.regexp.MatchString(v)
default:
logger.Panicf("BUG: unexpected tagFilter operation: %q", tf.op)
return false
}
}
func (sn *streamName) getTagValueByTagName(name string) string {
for _, t := range sn.tags {
if t.Name == name {
return t.Value
}
}
return ""
}

Some files were not shown because too many files have changed in this diff Show more