lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-24 03:06:55 +02:00
parent c96a98731a
commit 4b458370c1
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
45 changed files with 1972 additions and 871 deletions

View file

@ -1,17 +0,0 @@
{% stripspace %}
// FieldNamesResponse formats /select/logsql/field_names response
{% func FieldNamesResponse(names []string) %}
{
"names":[
{% if len(names) > 0 %}
{%q= names[0] %}
{% for _, v := range names[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -1,69 +0,0 @@
// Code generated by qtc from "field_names_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldNamesResponse formats /select/logsql/field_names response
//line app/vlselect/logsql/field_names_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_names_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_names_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_names_response.qtpl:4
func StreamFieldNamesResponse(qw422016 *qt422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:4
qw422016.N().S(`{"names":[`)
//line app/vlselect/logsql/field_names_response.qtpl:7
if len(names) > 0 {
//line app/vlselect/logsql/field_names_response.qtpl:8
qw422016.N().Q(names[0])
//line app/vlselect/logsql/field_names_response.qtpl:9
for _, v := range names[1:] {
//line app/vlselect/logsql/field_names_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_names_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_names_response.qtpl:11
}
//line app/vlselect/logsql/field_names_response.qtpl:12
}
//line app/vlselect/logsql/field_names_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func WriteFieldNamesResponse(qq422016 qtio422016.Writer, names []string) {
//line app/vlselect/logsql/field_names_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
StreamFieldNamesResponse(qw422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
}
//line app/vlselect/logsql/field_names_response.qtpl:15
func FieldNamesResponse(names []string) string {
//line app/vlselect/logsql/field_names_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_names_response.qtpl:15
WriteFieldNamesResponse(qb422016, names)
//line app/vlselect/logsql/field_names_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_names_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_names_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_names_response.qtpl:15
}

View file

@ -1,17 +0,0 @@
{% stripspace %}
// FieldValuesResponse formats /select/logsql/field_values response
{% func FieldValuesResponse(values []string) %}
{
"values":[
{% if len(values) > 0 %}
{%q= values[0] %}
{% for _, v := range values[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -1,69 +0,0 @@
// Code generated by qtc from "field_values_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// FieldValuesResponse formats /select/logsql/field_values response
//line app/vlselect/logsql/field_values_response.qtpl:4
package logsql
//line app/vlselect/logsql/field_values_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/field_values_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/field_values_response.qtpl:4
func StreamFieldValuesResponse(qw422016 *qt422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:4
qw422016.N().S(`{"values":[`)
//line app/vlselect/logsql/field_values_response.qtpl:7
if len(values) > 0 {
//line app/vlselect/logsql/field_values_response.qtpl:8
qw422016.N().Q(values[0])
//line app/vlselect/logsql/field_values_response.qtpl:9
for _, v := range values[1:] {
//line app/vlselect/logsql/field_values_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/field_values_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/field_values_response.qtpl:11
}
//line app/vlselect/logsql/field_values_response.qtpl:12
}
//line app/vlselect/logsql/field_values_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func WriteFieldValuesResponse(qq422016 qtio422016.Writer, values []string) {
//line app/vlselect/logsql/field_values_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
StreamFieldValuesResponse(qw422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
}
//line app/vlselect/logsql/field_values_response.qtpl:15
func FieldValuesResponse(values []string) string {
//line app/vlselect/logsql/field_values_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/field_values_response.qtpl:15
WriteFieldValuesResponse(qb422016, values)
//line app/vlselect/logsql/field_values_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/field_values_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/field_values_response.qtpl:15
return qs422016
//line app/vlselect/logsql/field_values_response.qtpl:15
}

View file

@ -146,7 +146,7 @@ func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *htt
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldNamesResponse(w, fieldNames)
WriteValuesWithHitsJSON(w, fieldNames)
}
// ProcessFieldValuesRequest handles /select/logsql/field_values request.
@ -186,7 +186,7 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
// Write results
w.Header().Set("Content-Type", "application/json")
WriteFieldValuesResponse(w, values)
WriteValuesWithHitsJSON(w, values)
}
// ProcessStreamLabelNamesRequest processes /select/logsql/stream_label_names request.
@ -208,7 +208,7 @@ func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter,
// Write results
w.Header().Set("Content-Type", "application/json")
WriteStreamLabelNamesResponse(w, names)
WriteValuesWithHitsJSON(w, names)
}
// ProcessStreamLabelValuesRequest processes /select/logsql/stream_label_values request.
@ -247,7 +247,7 @@ func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter,
// Write results
w.Header().Set("Content-Type", "application/json")
WriteStreamLabelValuesResponse(w, values)
WriteValuesWithHitsJSON(w, values)
}
// ProcessStreamsRequest processes /select/logsql/streams request.
@ -279,7 +279,7 @@ func ProcessStreamsRequest(ctx context.Context, w http.ResponseWriter, r *http.R
// Write results
w.Header().Set("Content-Type", "application/json")
WriteStreamsResponse(w, streams)
WriteValuesWithHitsJSON(w, streams)
}
// ProcessQueryRequest handles /select/logsql/query request.

View file

@ -0,0 +1,32 @@
{% import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
) %}
{% stripspace %}
// ValuesWithHitsJSON generates JSON from the given values.
{% func ValuesWithHitsJSON(values []logstorage.ValueWithHits) %}
{
"values":{%= valuesWithHitsJSONArray(values) %}
}
{% endfunc %}
{% func valuesWithHitsJSONArray(values []logstorage.ValueWithHits) %}
[
{% if len(values) > 0 %}
{%= valueWithHitsJSON(values[0]) %}
{% for _, v := range values[1:] %}
,{%= valueWithHitsJSON(v) %}
{% endfor %}
{% endif %}
]
{% endfunc %}
{% func valueWithHitsJSON(v logstorage.ValueWithHits) %}
{
"value":{%q= v.Value %},
"hits":{%dul= v.Hits %}
}
{% endfunc %}
{% endstripspace %}

View file

@ -0,0 +1,152 @@
// Code generated by qtc from "logsql.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vlselect/logsql/logsql.qtpl:1
package logsql
//line app/vlselect/logsql/logsql.qtpl:1
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
// ValuesWithHitsJSON generates JSON from the given values.
//line app/vlselect/logsql/logsql.qtpl:8
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/logsql.qtpl:8
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/logsql.qtpl:8
func StreamValuesWithHitsJSON(qw422016 *qt422016.Writer, values []logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:8
qw422016.N().S(`{"values":`)
//line app/vlselect/logsql/logsql.qtpl:10
streamvaluesWithHitsJSONArray(qw422016, values)
//line app/vlselect/logsql/logsql.qtpl:10
qw422016.N().S(`}`)
//line app/vlselect/logsql/logsql.qtpl:12
}
//line app/vlselect/logsql/logsql.qtpl:12
func WriteValuesWithHitsJSON(qq422016 qtio422016.Writer, values []logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:12
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/logsql.qtpl:12
StreamValuesWithHitsJSON(qw422016, values)
//line app/vlselect/logsql/logsql.qtpl:12
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/logsql.qtpl:12
}
//line app/vlselect/logsql/logsql.qtpl:12
func ValuesWithHitsJSON(values []logstorage.ValueWithHits) string {
//line app/vlselect/logsql/logsql.qtpl:12
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/logsql.qtpl:12
WriteValuesWithHitsJSON(qb422016, values)
//line app/vlselect/logsql/logsql.qtpl:12
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/logsql.qtpl:12
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/logsql.qtpl:12
return qs422016
//line app/vlselect/logsql/logsql.qtpl:12
}
//line app/vlselect/logsql/logsql.qtpl:14
func streamvaluesWithHitsJSONArray(qw422016 *qt422016.Writer, values []logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:14
qw422016.N().S(`[`)
//line app/vlselect/logsql/logsql.qtpl:16
if len(values) > 0 {
//line app/vlselect/logsql/logsql.qtpl:17
streamvalueWithHitsJSON(qw422016, values[0])
//line app/vlselect/logsql/logsql.qtpl:18
for _, v := range values[1:] {
//line app/vlselect/logsql/logsql.qtpl:18
qw422016.N().S(`,`)
//line app/vlselect/logsql/logsql.qtpl:19
streamvalueWithHitsJSON(qw422016, v)
//line app/vlselect/logsql/logsql.qtpl:20
}
//line app/vlselect/logsql/logsql.qtpl:21
}
//line app/vlselect/logsql/logsql.qtpl:21
qw422016.N().S(`]`)
//line app/vlselect/logsql/logsql.qtpl:23
}
//line app/vlselect/logsql/logsql.qtpl:23
func writevaluesWithHitsJSONArray(qq422016 qtio422016.Writer, values []logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:23
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/logsql.qtpl:23
streamvaluesWithHitsJSONArray(qw422016, values)
//line app/vlselect/logsql/logsql.qtpl:23
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/logsql.qtpl:23
}
//line app/vlselect/logsql/logsql.qtpl:23
func valuesWithHitsJSONArray(values []logstorage.ValueWithHits) string {
//line app/vlselect/logsql/logsql.qtpl:23
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/logsql.qtpl:23
writevaluesWithHitsJSONArray(qb422016, values)
//line app/vlselect/logsql/logsql.qtpl:23
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/logsql.qtpl:23
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/logsql.qtpl:23
return qs422016
//line app/vlselect/logsql/logsql.qtpl:23
}
//line app/vlselect/logsql/logsql.qtpl:25
func streamvalueWithHitsJSON(qw422016 *qt422016.Writer, v logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:25
qw422016.N().S(`{"value":`)
//line app/vlselect/logsql/logsql.qtpl:27
qw422016.N().Q(v.Value)
//line app/vlselect/logsql/logsql.qtpl:27
qw422016.N().S(`,"hits":`)
//line app/vlselect/logsql/logsql.qtpl:28
qw422016.N().DUL(v.Hits)
//line app/vlselect/logsql/logsql.qtpl:28
qw422016.N().S(`}`)
//line app/vlselect/logsql/logsql.qtpl:30
}
//line app/vlselect/logsql/logsql.qtpl:30
func writevalueWithHitsJSON(qq422016 qtio422016.Writer, v logstorage.ValueWithHits) {
//line app/vlselect/logsql/logsql.qtpl:30
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/logsql.qtpl:30
streamvalueWithHitsJSON(qw422016, v)
//line app/vlselect/logsql/logsql.qtpl:30
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/logsql.qtpl:30
}
//line app/vlselect/logsql/logsql.qtpl:30
func valueWithHitsJSON(v logstorage.ValueWithHits) string {
//line app/vlselect/logsql/logsql.qtpl:30
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/logsql.qtpl:30
writevalueWithHitsJSON(qb422016, v)
//line app/vlselect/logsql/logsql.qtpl:30
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/logsql.qtpl:30
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/logsql.qtpl:30
return qs422016
//line app/vlselect/logsql/logsql.qtpl:30
}

View file

@ -1,17 +0,0 @@
{% stripspace %}
// StreamLabelNamesResponse formats /select/logsql/stream_label_names response
{% func StreamLabelNamesResponse(names []string) %}
{
"names":[
{% if len(names) > 0 %}
{%q= names[0] %}
{% for _, v := range names[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -1,69 +0,0 @@
// Code generated by qtc from "stream_label_names_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// StreamLabelNamesResponse formats /select/logsql/stream_label_names response
//line app/vlselect/logsql/stream_label_names_response.qtpl:4
package logsql
//line app/vlselect/logsql/stream_label_names_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/stream_label_names_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/stream_label_names_response.qtpl:4
func StreamStreamLabelNamesResponse(qw422016 *qt422016.Writer, names []string) {
//line app/vlselect/logsql/stream_label_names_response.qtpl:4
qw422016.N().S(`{"names":[`)
//line app/vlselect/logsql/stream_label_names_response.qtpl:7
if len(names) > 0 {
//line app/vlselect/logsql/stream_label_names_response.qtpl:8
qw422016.N().Q(names[0])
//line app/vlselect/logsql/stream_label_names_response.qtpl:9
for _, v := range names[1:] {
//line app/vlselect/logsql/stream_label_names_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/stream_label_names_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/stream_label_names_response.qtpl:11
}
//line app/vlselect/logsql/stream_label_names_response.qtpl:12
}
//line app/vlselect/logsql/stream_label_names_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
}
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
func WriteStreamLabelNamesResponse(qq422016 qtio422016.Writer, names []string) {
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
StreamStreamLabelNamesResponse(qw422016, names)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
}
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
func StreamLabelNamesResponse(names []string) string {
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
WriteStreamLabelNamesResponse(qb422016, names)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
return qs422016
//line app/vlselect/logsql/stream_label_names_response.qtpl:15
}

View file

@ -1,17 +0,0 @@
{% stripspace %}
// StreamLabelValuesResponse formats /select/logsql/stream_label_values response
{% func StreamLabelValuesResponse(values []string) %}
{
"values":[
{% if len(values) > 0 %}
{%q= values[0] %}
{% for _, v := range values[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -1,69 +0,0 @@
// Code generated by qtc from "stream_label_values_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// StreamLabelValuesResponse formats /select/logsql/stream_label_values response
//line app/vlselect/logsql/stream_label_values_response.qtpl:4
package logsql
//line app/vlselect/logsql/stream_label_values_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/stream_label_values_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/stream_label_values_response.qtpl:4
func StreamStreamLabelValuesResponse(qw422016 *qt422016.Writer, values []string) {
//line app/vlselect/logsql/stream_label_values_response.qtpl:4
qw422016.N().S(`{"values":[`)
//line app/vlselect/logsql/stream_label_values_response.qtpl:7
if len(values) > 0 {
//line app/vlselect/logsql/stream_label_values_response.qtpl:8
qw422016.N().Q(values[0])
//line app/vlselect/logsql/stream_label_values_response.qtpl:9
for _, v := range values[1:] {
//line app/vlselect/logsql/stream_label_values_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/stream_label_values_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/stream_label_values_response.qtpl:11
}
//line app/vlselect/logsql/stream_label_values_response.qtpl:12
}
//line app/vlselect/logsql/stream_label_values_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
}
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
func WriteStreamLabelValuesResponse(qq422016 qtio422016.Writer, values []string) {
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
StreamStreamLabelValuesResponse(qw422016, values)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
}
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
func StreamLabelValuesResponse(values []string) string {
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
WriteStreamLabelValuesResponse(qb422016, values)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
return qs422016
//line app/vlselect/logsql/stream_label_values_response.qtpl:15
}

View file

@ -1,17 +0,0 @@
{% stripspace %}
// StreamsResponse formats /select/logsql/streams response
{% func StreamsResponse(streams []string) %}
{
"streams":[
{% if len(streams) > 0 %}
{%q= streams[0] %}
{% for _, v := range streams[1:] %}
,{%q= v %}
{% endfor %}
{% endif %}
]
}
{% endfunc %}
{% endstripspace %}

View file

@ -1,69 +0,0 @@
// Code generated by qtc from "streams_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
// StreamsResponse formats /select/logsql/streams response
//line app/vlselect/logsql/streams_response.qtpl:4
package logsql
//line app/vlselect/logsql/streams_response.qtpl:4
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vlselect/logsql/streams_response.qtpl:4
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vlselect/logsql/streams_response.qtpl:4
func StreamStreamsResponse(qw422016 *qt422016.Writer, streams []string) {
//line app/vlselect/logsql/streams_response.qtpl:4
qw422016.N().S(`{"streams":[`)
//line app/vlselect/logsql/streams_response.qtpl:7
if len(streams) > 0 {
//line app/vlselect/logsql/streams_response.qtpl:8
qw422016.N().Q(streams[0])
//line app/vlselect/logsql/streams_response.qtpl:9
for _, v := range streams[1:] {
//line app/vlselect/logsql/streams_response.qtpl:9
qw422016.N().S(`,`)
//line app/vlselect/logsql/streams_response.qtpl:10
qw422016.N().Q(v)
//line app/vlselect/logsql/streams_response.qtpl:11
}
//line app/vlselect/logsql/streams_response.qtpl:12
}
//line app/vlselect/logsql/streams_response.qtpl:12
qw422016.N().S(`]}`)
//line app/vlselect/logsql/streams_response.qtpl:15
}
//line app/vlselect/logsql/streams_response.qtpl:15
func WriteStreamsResponse(qq422016 qtio422016.Writer, streams []string) {
//line app/vlselect/logsql/streams_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/streams_response.qtpl:15
StreamStreamsResponse(qw422016, streams)
//line app/vlselect/logsql/streams_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/streams_response.qtpl:15
}
//line app/vlselect/logsql/streams_response.qtpl:15
func StreamsResponse(streams []string) string {
//line app/vlselect/logsql/streams_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/streams_response.qtpl:15
WriteStreamsResponse(qb422016, streams)
//line app/vlselect/logsql/streams_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vlselect/logsql/streams_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vlselect/logsql/streams_response.qtpl:15
return qs422016
//line app/vlselect/logsql/streams_response.qtpl:15
}

View file

@ -112,33 +112,33 @@ func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorag
}
// GetFieldNames executes q and returns field names seen in results.
func GetFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]string, error) {
func GetFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
return strg.GetFieldNames(ctx, tenantIDs, q)
}
// GetFieldValues executes q and returns unique values for the fieldName seen in results.
//
// If limit > 0, then up to limit unique values are returned.
func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]string, error) {
func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]logstorage.ValueWithHits, error) {
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
}
// GetStreamLabelNames executes q and returns stream labels names seen in results.
func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]string, error) {
func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelNames(ctx, tenantIDs, q)
}
// GetStreamLabelValues executes q and returns stream label values for the given labelName seen in results.
//
// If limit > 0, then up to limit unique stream label values are returned.
func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]string, error) {
func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelValues(ctx, tenantIDs, q, labelName, limit)
}
// GetStreams executes q and returns streams seen in query results.
//
// If limit > 0, then up to limit unique streams are returned.
func GetStreams(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, limit uint64) ([]string, error) {
func GetStreams(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, limit uint64) ([]logstorage.ValueWithHits, error) {
return strg.GetStreams(ctx, tenantIDs, q, limit)
}

View file

@ -19,6 +19,17 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip
* FAETURE: return the number of matching log entries per returned value in [HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#http-api) results. This simplifies detecting [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) / [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) values with the biggest number of logs for the given [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/).
* FEATURE: improve performance for [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) in the following cases:
- If the regexp contains just a phrase without special regular expression chars. For example, `~"foo"`.
- If the regexp starts with `.*` or ends with `.*`. For example, `~".*foo.*"`.
- If the regexp contains multiple strings delimited by `|`. For example, `~"foo|bar|baz"`.
- If the regexp contains multiple [words](https://docs.victoriametrics.com/victorialogs/logsql/#word). For example, `~"foo bar baz"`.
* FEATURE: allow disabling automatic unquoting of the matched placeholders in [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-for-extract-pipe-pattern).
* BUGFIX: properly parse `!` in front of [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter), [exact-prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) and [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter). For example, `!~"some regexp"` is properly parsed as `not ="some regexp"`. Previously it was incorrectly parsed as `'~="some regexp"'` [phrase filter](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
* BUGFIX: properly sort results by [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) when [`limit` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe) is applied. For example, `_time:5m | sort by (_time) desc | limit 10` properly works now.
## [v0.9.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.1-victorialogs)
Released at 2024-05-22

View file

@ -403,6 +403,13 @@ This query doesn't match the following log messages:
- `SSH: login fail`, since the `SSH` word is in capital letters. Use `i("ssh: login fail")` for case-insensitive search.
See [these docs](#case-insensitive-filter) for details.
If the phrase contains double quotes, then either put `\` in front of double quotes or put the phrase inside single quotes. For example, the following filter searches
logs with `"foo":"bar"` phrase:
```logsql
'"foo":"bar"'
```
By default the given phrase is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the phrase and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries containing the `cannot open file` phrase in the `event.original` field:
@ -470,6 +477,13 @@ This query doesn't match the following log messages:
- `failed to open file: unexpected EOF`, since `failed` [word](#word) occurs before the `unexpected` word. Use `unexpected AND fail*` for this case.
See [these docs](#logical-filter) for details.
If the prefix contains double quotes, then either put `\` in front of double quotes or put the prefix inside single quotes. For example, the following filter searches
logs with `"foo":"bar` prefix:
```logsql
'"foo":"bar'*
```
By default the prefix filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the prefix filter
in order to apply it to the given field. For example, the following query matches `log.level` field containing any word with the `err` prefix:
@ -783,6 +797,13 @@ The query doesn't match the following log messages:
See [these docs](https://github.com/google/re2/wiki/Syntax) for details. See also [case-insenstive filter docs](#case-insensitive-filter).
- `it is warmer than usual`, since it doesn't contain neither `err` nor `warn` substrings.
If the regexp contains double quotes, then either put `\` in front of double quotes or put the regexp inside single quotes. For example, the following regexp searches
logs matching `"foo":"(bar|baz)"` regexp:
```logsql
'"foo":"(bar|baz)"'
```
By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings:
@ -1134,7 +1155,8 @@ For example, the following query is equivalent to the previous one:
_time:1d error | extract "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
```
If the `pattern` contains double quotes, then it can be quoted into single quotes. For example, the following query extracts `ip` from the corresponding JSON field:
If the `pattern` contains double quotes, then either put `\` in front of double quotes or put the `pattern` inside single quotes.
For example, the following query extracts `ip` from the corresponding JSON field:
```logsql
_time:5m | extract '"ip":"<ip>"'
@ -1162,7 +1184,7 @@ Placeholders can be anonymous and named. Anonymous placeholders are written as `
must be skipped until the next `textX`. Named palceholders are written as `<some_name>`, where `some_name` is the name of the log field to store
the corresponding matching substring to.
The matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
Matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found
in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely.
@ -1197,6 +1219,13 @@ This is useful for extracting JSON strings. For example, the following `pattern`
"message":<msg>
```
The automatic string unquoting can be disabled if needed by adding `plain:` prefix in front of the field name. For example, if some JSON array of string values must be captured
into `json_array` field, then the following `pattern` can be used:
```
some json string array: [<plain:json_array>]
```
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
For example, the following `pattern` properly matches `a < b` text by extracting `a` into `left` field and `b` into `right` field:
@ -1217,12 +1246,12 @@ _time:5m | extract if (ip:"") "ip=<ip> "
### field_names pipe
Sometimes it may be needed to get all the field names for the selected results. This may be done with `| field_names ...` [pipe](#pipes).
For example, the following query returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
from the logs over the last 5 minutes:
`| field_names` [pipe](#pipes) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with an estimated number of logs per each field name.
For example, the following query returns all the field names with the number of matching logs over the last 5 minutes:
```logsql
_time:5m | field_names as names
_time:5m | field_names
```
Field names are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
@ -1593,7 +1622,7 @@ _time:5m | stats
### uniq pipe
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
`| uniq ...` pipe returns unique results over the selected logs. For example, the following LogsQL query
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
@ -1610,6 +1639,12 @@ _time:5m | uniq by (host, path)
The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
Add `hits` after `uniq by (...)` in order to return the number of matching logs per each field value:
```logsql
_time:5m | uniq by (host) hits
```
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
@ -1618,6 +1653,8 @@ This allows limiting memory usage. For example, the following query returns up t
_time:5m | uniq by (host, path) limit 100
```
If the `limit` is reached, then arbitrary subset of unique values can be returned. The `hits` calculation doesn't work when the `limit` is reached.
The `by` keyword can be skipped in `uniq ...` pipe. For example, the following query is equivalent to the previous one:
```logsql
@ -1887,7 +1924,7 @@ across logs for the last 5 minutes:
_time:5m | stats fields_max(duration) as log_with_max_duration
```
Fields from the returned values can be decoded with [`unpack_json`](#unpack_json-pipe) or [`extract`](#extract) pipes.
Fields from the returned values can be decoded with [`unpack_json`](#unpack_json-pipe) or [`extract`](#extract-pipe) pipes.
If only the specific fields are needed from the returned log entry, then they can be enumerated inside `fields_max(...)`.
For example, the following query returns only `_time`, `path` and `duration` fields from the log entry with the maximum `duration` over the last 5 minutes:
@ -1914,7 +1951,7 @@ across logs for the last 5 minutes:
_time:5m | stats fields_min(duration) as log_with_min_duration
```
Fields from the returned values can be decoded with [`unpack_json`](#unpack_json-pipe) or [`extract`](#extract) pipes.
Fields from the returned values can be decoded with [`unpack_json`](#unpack_json-pipe) or [`extract`](#extract-pipe) pipes.
If only the specific fields are needed from the returned log entry, then they can be enumerated inside `fields_max(...)`.
For example, the following query returns only `_time`, `path` and `duration` fields from the log entry with the minimum `duration` over the last 5 minutes:

View file

@ -211,6 +211,7 @@ See also:
VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every `stream`.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
@ -227,11 +228,19 @@ Below is an example JSON output returned from this endpoint:
```json
{
"streams": [
"{host=\"1.2.3.4\",app=\"foo\"}",
"{host=\"1.2.3.4\",app=\"bar\"}",
"{host=\"10.2.3.4\",app=\"foo\"}",
"{host=\"10.2.3.5\",app=\"baz\"}"
"values": [
{
"value": "{host=\"host-123\",app=\"foo\"}",
"hits": 34980
},
{
"value": "{host=\"host-124\",app=\"bar\"}",
"hits": 32892
},
{
"value": "{host=\"host-125\",app=\"baz\"}",
"hits": 32877
}
]
}
```
@ -250,6 +259,7 @@ See also:
VictoriaLogs provides `/select/logsql/stream_label_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns
[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results
of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
@ -266,12 +276,19 @@ Below is an example JSON output returned from this endpoint:
```json
{
"names": [
"app",
"container",
"datacenter",
"host",
"namespace"
"values": [
{
"value": "app",
"hits": 1033300623
},
{
"value": "container",
"hits": 1033300623
},
{
"value": "datacenter",
"hits": 1033300623
}
]
}
```
@ -288,6 +305,7 @@ See also:
VictoriaLogs provides `/select/logsql/stream_label_values?query=<query>&start=<start>&<end>&label=<labelName>` HTTP endpoint,
which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the label with the given `<labelName>` name
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
@ -305,10 +323,14 @@ Below is an example JSON output returned from this endpoint:
```json
{
"values": [
"host-0",
"host-1",
"host-2",
"host-3"
{
"value": "host-1",
"hits": 69426656
},
{
"value": "host-2",
"hits": 66507749
}
]
}
```
@ -327,6 +349,7 @@ See also:
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
@ -343,13 +366,19 @@ Below is an example JSON output returned from this endpoint:
```json
{
"names": [
"_msg",
"_stream",
"_time",
"host",
"level",
"location"
"values": [
{
"value": "_msg",
"hits": 1033300623
},
{
"value": "_stream",
"hits": 1033300623
},
{
"value": "_time",
"hits": 1033300623
}
]
}
```
@ -366,6 +395,7 @@ See also:
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
@ -383,17 +413,25 @@ Below is an example JSON output returned from this endpoint:
```json
{
"values": [
"host_0",
"host_1",
"host_10",
"host_100",
"host_1000"
{
"value": "host-1",
"hits": 69426656
},
{
"value": "host-2",
"hits": 66507749
},
{
"value": "host-3",
"hits": 65454351
}
]
}
```
The `/select/logsql/field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
When the `limit` is reached, `hits` are zeroed, since they cannot be calculated reliably.
See also:

View file

@ -1804,6 +1804,8 @@ func appendResultColumnWithName(dst []resultColumn, name string) []resultColumn
}
// addValue adds the given values v to rc.
//
// rc is valid until v is modified.
func (rc *resultColumn) addValue(v string) {
rc.values = append(rc.values, v)
}

View file

@ -2,9 +2,11 @@ package logstorage
import (
"fmt"
"regexp"
"sync"
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
// filterRegexp matches the given regexp
@ -12,17 +14,51 @@ import (
// Example LogsQL: `fieldName:re("regexp")`
type filterRegexp struct {
fieldName string
re *regexp.Regexp
re *regexutil.Regex
tokens []string
tokensOnce sync.Once
}
func (fr *filterRegexp) String() string {
return fmt.Sprintf("%s~%q", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
return fmt.Sprintf("%s~%s", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.re.String()))
}
func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fr.fieldName)
}
func (fr *filterRegexp) getTokens() []string {
fr.tokensOnce.Do(fr.initTokens)
return fr.tokens
}
func (fr *filterRegexp) initTokens() {
literals := fr.re.GetLiterals()
for i, literal := range literals {
literals[i] = skipFirstLastToken(literal)
}
fr.tokens = tokenizeStrings(nil, literals)
}
func skipFirstLastToken(s string) string {
for {
r, runeSize := utf8.DecodeRuneInString(s)
if !isTokenRune(r) {
break
}
s = s[runeSize:]
}
for {
r, runeSize := utf8.DecodeLastRuneInString(s)
if !isTokenRune(r) {
break
}
s = s[:len(s)-runeSize]
}
return s
}
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
re := fr.re
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
@ -53,31 +89,37 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fr.getTokens()
switch ch.valueType {
case valueTypeString:
matchStringByRegexp(bs, ch, bm, re)
matchStringByRegexp(bs, ch, bm, re, tokens)
case valueTypeDict:
matchValuesDictByRegexp(bs, ch, bm, re)
case valueTypeUint8:
matchUint8ByRegexp(bs, ch, bm, re)
matchUint8ByRegexp(bs, ch, bm, re, tokens)
case valueTypeUint16:
matchUint16ByRegexp(bs, ch, bm, re)
matchUint16ByRegexp(bs, ch, bm, re, tokens)
case valueTypeUint32:
matchUint32ByRegexp(bs, ch, bm, re)
matchUint32ByRegexp(bs, ch, bm, re, tokens)
case valueTypeUint64:
matchUint64ByRegexp(bs, ch, bm, re)
matchUint64ByRegexp(bs, ch, bm, re, tokens)
case valueTypeFloat64:
matchFloat64ByRegexp(bs, ch, bm, re)
matchFloat64ByRegexp(bs, ch, bm, re, tokens)
case valueTypeIPv4:
matchIPv4ByRegexp(bs, ch, bm, re)
matchIPv4ByRegexp(bs, ch, bm, re, tokens)
case valueTypeTimestampISO8601:
matchTimestampISO8601ByRegexp(bs, ch, bm, re)
matchTimestampISO8601ByRegexp(bs, ch, bm, re, tokens)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
}
}
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601String(bs, bb, v)
@ -86,7 +128,11 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4String(bs, bb, v)
@ -95,7 +141,11 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
bbPool.Put(bb)
}
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64String(bs, bb, v)
@ -104,7 +154,7 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
bbPool.Put(bb)
}
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
for _, v := range ch.valuesDict.values {
c := byte(0)
@ -117,13 +167,21 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
bbPool.Put(bb)
}
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
return re.MatchString(v)
})
}
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint8String(bs, bb, v)
@ -132,7 +190,11 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
bbPool.Put(bb)
}
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint16String(bs, bb, v)
@ -141,7 +203,11 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint32String(bs, bb, v)
@ -150,7 +216,11 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint64String(bs, bb, v)

View file

@ -1,8 +1,10 @@
package logstorage
import (
"regexp"
"fmt"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
func TestFilterRegexp(t *testing.T) {
@ -21,32 +23,32 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("0.0"),
re: mustCompileRegex("0.0"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile(`^127\.0\.0\.1$`),
re: mustCompileRegex(`^127\.0\.0\.1$`),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{
fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar|"),
re: mustCompileRegex("foo.+bar|"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo.+bar"),
re: mustCompileRegex("foo.+bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterRegexp{
fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar"),
re: mustCompileRegex("foo.+bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -71,20 +73,20 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar|^$"),
re: mustCompileRegex("foo|bar|^$"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6})
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("27.0"),
re: mustCompileRegex("27.0"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("bar.+foo"),
re: mustCompileRegex("bar.+foo"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -111,14 +113,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("(?i)foo|йцу"),
re: mustCompileRegex("(?i)foo|йцу"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("qwe.+rty|^$"),
re: mustCompileRegex("qwe.+rty|^$"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -146,14 +148,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -181,14 +183,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -216,14 +218,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -251,14 +253,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -286,14 +288,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -322,14 +324,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("127.0.[40].(1|2)"),
re: mustCompileRegex("127.0.[40].(1|2)"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar|834"),
re: mustCompileRegex("foo|bar|834"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -355,15 +357,42 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"),
re: mustCompileRegex("2006-[0-9]{2}-.+?(2|5)Z"),
}
testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4})
// mismatch
fr = &filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("^01|04$"),
re: mustCompileRegex("^01|04$"),
}
testFilterMatchForColumns(t, columns, fr, "_msg", nil)
})
}
func TestSkipFirstLastToken(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
result := skipFirstLastToken(s)
if result != resultExpected {
t.Fatalf("unexpected result in skipFirstLastToken(%q); got %q; want %q", s, result, resultExpected)
}
}
f("", "")
f("foobar", "")
f("foo bar", " ")
f("foo bar baz", " bar ")
f(" foo bar baz", " foo bar ")
f(",foo bar baz!", ",foo bar baz!")
f("фыад длоа д!", " длоа д!")
}
func mustCompileRegex(expr string) *regexutil.Regex {
re, err := regexutil.NewRegex(expr)
if err != nil {
panic(fmt.Errorf("BUG: cannot compile %q: %w", expr, err))
}
return re
}

View file

@ -22,11 +22,7 @@ type filterStream struct {
}
func (fs *filterStream) String() string {
s := fs.f.String()
if s == "{}" {
return ""
}
return "_stream:" + s
return "_stream:" + fs.f.String()
}
func (fs *filterStream) updateNeededFields(neededFields fieldsSet) {

View file

@ -38,7 +38,7 @@ func (p *logfmtParser) parse(s string) {
}
// Search for field value
value, nOffset := tryUnquoteString(s)
value, nOffset := tryUnquoteString(s, "")
if nOffset >= 0 {
p.addField(name, value)
s = s[nOffset:]

View file

@ -3,7 +3,6 @@ package logstorage
import (
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
@ -12,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
type lexer struct {
@ -597,8 +597,12 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
return parseFilterLT(lex, fieldName)
case lex.isKeyword("="):
return parseFilterEQ(lex, fieldName)
case lex.isKeyword("!="):
return parseFilterNEQ(lex, fieldName)
case lex.isKeyword("~"):
return parseFilterTilda(lex, fieldName)
case lex.isKeyword("!~"):
return parseFilterNotTilda(lex, fieldName)
case lex.isKeyword("not", "!"):
return parseFilterNot(lex, fieldName)
case lex.isKeyword("exact"):
@ -1007,7 +1011,7 @@ func parseFilterExact(lex *lexer, fieldName string) (filter, error) {
func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token
return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
re, err := regexp.Compile(arg)
re, err := regexutil.NewRegex(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err)
}
@ -1022,7 +1026,7 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
arg := getCompoundFuncArg(lex)
re, err := regexp.Compile(arg)
re, err := regexutil.NewRegex(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", arg, err)
}
@ -1033,6 +1037,17 @@ func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
return fr, nil
}
func parseFilterNotTilda(lex *lexer, fieldName string) (filter, error) {
f, err := parseFilterTilda(lex, fieldName)
if err != nil {
return nil, err
}
fn := &filterNot{
f: f,
}
return fn, nil
}
func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
phrase := getCompoundFuncArg(lex)
@ -1051,6 +1066,17 @@ func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
return f, nil
}
func parseFilterNEQ(lex *lexer, fieldName string) (filter, error) {
f, err := parseFilterEQ(lex, fieldName)
if err != nil {
return nil, err
}
fn := &filterNot{
f: f,
}
return fn, nil
}
func parseFilterGT(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()

View file

@ -544,6 +544,16 @@ func TestParseQuerySuccess(t *testing.T) {
if result != resultExpected {
t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
}
// verify that the marshaled query is parsed to the same query
qParsed, err := ParseQuery(result)
if err != nil {
t.Fatalf("cannot parse marshaled query: %s", err)
}
qStr := qParsed.String()
if qStr != result {
t.Fatalf("unexpected marshaled query\ngot\n%s\nwant\n%s", qStr, result)
}
}
f("foo", "foo")
@ -586,7 +596,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`)
f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`)
f("level:error and _msg:(a or b)", "level:error (a or b)")
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:~"foo" !bar`)
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:~foo !bar`)
f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`)
// prefix search
@ -600,7 +610,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`"" or foo:"" and not bar:""`, `"" or foo:"" !bar:""`)
// _stream filters
f(`_stream:{}`, ``)
f(`_stream:{}`, `_stream:{}`)
f(`_stream:{foo="bar", baz=~"x" OR or!="b", "x=},"="d}{"}`, `_stream:{foo="bar",baz=~"x" or "or"!="b","x=},"="d}{"}`)
f(`_stream:{or=a or ","="b"}`, `_stream:{"or"="a" or ","="b"}`)
f("_stream : { foo = bar , } ", `_stream:{foo="bar"}`)
@ -713,7 +723,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`exact("foo/bar")`, `="foo/bar"`)
f(`exact('foo/bar')`, `="foo/bar"`)
f(`="foo/bar"`, `="foo/bar"`)
f("=foo=bar =b<=a>z ='abc'*", `="foo=bar" ="b<=a>z" =abc*`)
f("=foo=bar !=b<=a>z foo:!='abc'*", `="foo=bar" !="b<=a>z" !foo:=abc*`)
f("==foo =>=bar x : ( = =a<b*='c*' >=20)", `="=foo" =">=bar" x:="=a<b"* x:="c*" x:>=20`)
// i filter
@ -772,14 +782,14 @@ func TestParseQuerySuccess(t *testing.T) {
f(`foo: >= 10.5M`, `foo:>=10.5M`)
f(`foo: < 10.5M`, `foo:<10.5M`)
f(`foo: <= 10.5M`, `foo:<=10.5M`)
f(`foo:(>10 <=20)`, `foo:>10 foo:<=20`)
f(`>=10 <20`, `>=10 <20`)
f(`foo:(>10 !<=20)`, `foo:>10 !foo:<=20`)
f(`>=10 !<20`, `>=10 !<20`)
// re filter
f("re('foo|ba(r.+)')", `~"foo|ba(r.+)"`)
f("re(foo)", `~"foo"`)
f("re(foo)", `~foo`)
f(`foo:re(foo-bar/baz.)`, `foo:~"foo-bar/baz."`)
f(`~foo.bar.baz`, `~"foo.bar.baz"`)
f(`~foo.bar.baz !~bar`, `~foo.bar.baz !~bar`)
f(`foo:~~foo~ba/ba>z`, `foo:~"~foo~ba/ba>z"`)
f(`foo:~'.*'`, `foo:~".*"`)
@ -1266,7 +1276,6 @@ func TestParseQueryFailure(t *testing.T) {
f(`foo | fields bar,,`)
// invalid field_names
f(`foo | field_names`)
f(`foo | field_names |`)
f(`foo | field_names (`)
f(`foo | field_names )`)

View file

@ -5,8 +5,6 @@ import (
"html"
"strconv"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// pattern represents text pattern in the form 'some_text<some_field>other_text...'
@ -28,18 +26,25 @@ type patternField struct {
type patternStep struct {
prefix string
field string
opt string
field string
fieldOpt string
}
func (ptn *pattern) clone() *pattern {
steps := ptn.steps
fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
if len(fields) == 0 {
logger.Panicf("BUG: fields cannot be empty for steps=%v", steps)
matches := make([]string, len(ptn.steps))
var fields []patternField
for i, step := range ptn.steps {
if step.field != "" {
fields = append(fields, patternField{
name: step.field,
value: &matches[i],
})
}
}
return &pattern{
steps: steps,
steps: ptn.steps,
matches: matches,
fields: fields,
}
@ -59,7 +64,18 @@ func parsePattern(s string) (*pattern, error) {
}
// Build pattern struct
fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
matches := make([]string, len(steps))
var fields []patternField
for i, step := range steps {
if step.field != "" {
fields = append(fields, patternField{
name: step.field,
value: &matches[i],
})
}
}
if len(fields) == 0 {
return nil, fmt.Errorf("pattern %q must contain at least a single named field in the form <field_name>", s)
}
@ -72,35 +88,17 @@ func parsePattern(s string) (*pattern, error) {
return ptn, nil
}
func newFieldsAndMatchesFromPatternSteps(steps []patternStep) ([]patternField, []string) {
matches := make([]string, len(steps))
var fields []patternField
for i, step := range steps {
if step.field != "" {
fields = append(fields, patternField{
name: step.field,
value: &matches[i],
})
}
}
return fields, matches
}
func (ptn *pattern) apply(s string) {
clear(ptn.matches)
steps := ptn.steps
if prefix := steps[0].prefix; prefix != "" {
n := strings.Index(s, prefix)
if n < 0 {
// Mismatch
return
}
s = s[n+len(prefix):]
n, prefixLen := prefixIndex(s, steps[0].prefix)
if n < 0 {
// Mismatch
return
}
s = s[n+prefixLen:]
matches := ptn.matches
for i := range steps {
@ -109,7 +107,7 @@ func (ptn *pattern) apply(s string) {
nextPrefix = steps[i+1].prefix
}
us, nOffset := tryUnquoteString(s)
us, nOffset := tryUnquoteString(s, steps[i].fieldOpt)
if nOffset >= 0 {
// Matched quoted string
matches[i] = us
@ -125,31 +123,45 @@ func (ptn *pattern) apply(s string) {
matches[i] = s
return
}
n := strings.Index(s, nextPrefix)
n, prefixLen := prefixIndex(s, nextPrefix)
if n < 0 {
// Mismatch
return
}
matches[i] = s[:n]
s = s[n+len(nextPrefix):]
s = s[n+prefixLen:]
}
}
}
func tryUnquoteString(s string) (string, int) {
func prefixIndex(s, prefix string) (int, int) {
if len(prefix) == 0 {
return 0, 0
}
n := strings.Index(s, prefix)
if n < 0 {
return -1, 0
}
return n, len(prefix)
}
func tryUnquoteString(s, opt string) (string, int) {
if opt == "plain" {
return "", -1
}
if len(s) == 0 {
return s, -1
return "", -1
}
if s[0] != '"' && s[0] != '`' {
return s, -1
return "", -1
}
qp, err := strconv.QuotedPrefix(s)
if err != nil {
return s, -1
return "", -1
}
us, err := strconv.Unquote(qp)
if err != nil {
return s, -1
return "", -1
}
return us, len(qp)
}
@ -160,7 +172,7 @@ func parsePatternSteps(s string) ([]patternStep, error) {
return nil, err
}
// Unescape prefixes
// unescape prefixes
for i := range steps {
step := &steps[i]
step.prefix = html.UnescapeString(step.prefix)
@ -171,9 +183,10 @@ func parsePatternSteps(s string) ([]patternStep, error) {
step := &steps[i]
field := step.field
if n := strings.IndexByte(field, ':'); n >= 0 {
step.opt = field[:n]
step.field = field[n+1:]
step.fieldOpt = strings.TrimSpace(field[:n])
field = field[n+1:]
}
step.field = strings.TrimSpace(field)
}
return steps, nil

View file

@ -63,6 +63,9 @@ func TestPatternApply(t *testing.T) {
f(`foo=<bar> `, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
f(`<foo>`, `"foo,\"bar"`, []string{`foo,"bar`})
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
// disable automatic unquoting of quoted field
f(`[<plain:foo>]`, `["foo","bar"]`, []string{`"foo","bar"`})
}
func TestParsePatternFailure(t *testing.T) {
@ -196,7 +199,7 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "<&>",
},
})
f("&lt;<foo>&amp;gt;", []patternStep{
f("&lt;< foo >&amp;gt;", []patternStep{
{
prefix: "<",
field: "foo",
@ -205,15 +208,15 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "&gt;",
},
})
f("<q:foo>bar<abc:baz:c:y>f<:foo:bar:baz>", []patternStep{
f("< q : foo >bar<plain : baz:c:y>f<:foo:bar:baz>", []patternStep{
{
field: "foo",
opt: "q",
field: "foo",
fieldOpt: "q",
},
{
prefix: "bar",
field: "baz:c:y",
opt: "abc",
prefix: "bar",
field: "baz:c:y",
fieldOpt: "plain",
},
{
prefix: "f",

View file

@ -99,6 +99,30 @@ func TestPipeExtract(t *testing.T) {
},
})
// single row, disable unquoting
f(`extract 'foo=[< plain : bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `"bc","de"`},
},
})
// single row, default unquoting
f(`extract 'foo=[< bar >]' from x`, [][]Field{
{
{"x", `a foo=["bc","de"]`},
},
}, [][]Field{
{
{"x", `a foo=["bc","de"]`},
{"bar", `bc`},
},
})
// single row, overwirte existing column
f(`extract "foo=<bar> baz=<xx>" from x`, [][]Field{
{

View file

@ -10,7 +10,8 @@ import (
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#field-names-pipe
type pipeFieldNames struct {
// resultName is the name of the column to write results to.
// resultName is an optional name of the column to write results to.
// By default results are written into 'name' column.
resultName string
// isFirstPipe is set to true if '| field_names' pipe is the first in the query.
@ -20,7 +21,11 @@ type pipeFieldNames struct {
}
func (pf *pipeFieldNames) String() string {
return "field_names as " + quoteTokenIfNeeded(pf.resultName)
s := "field_names"
if pf.resultName != "name" {
s += " as " + quoteTokenIfNeeded(pf.resultName)
}
return s
}
func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {
@ -34,13 +39,6 @@ func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fields
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeFieldNamesProcessorShard, workersCount)
for i := range shards {
shards[i] = pipeFieldNamesProcessorShard{
pipeFieldNamesProcessorShardNopad: pipeFieldNamesProcessorShardNopad{
m: make(map[string]struct{}),
},
}
}
pfp := &pipeFieldNamesProcessor{
pf: pf,
@ -68,8 +66,15 @@ type pipeFieldNamesProcessorShard struct {
}
type pipeFieldNamesProcessorShardNopad struct {
// m holds unique field names.
m map[string]struct{}
// m holds hits per each field name
m map[string]*uint64
}
func (shard *pipeFieldNamesProcessorShard) getM() map[string]*uint64 {
if shard.m == nil {
shard.m = make(map[string]*uint64)
}
return shard.m
}
func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
@ -78,12 +83,21 @@ func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
}
shard := &pfp.shards[workerID]
m := shard.getM()
cs := br.getColumns()
for _, c := range cs {
if _, ok := shard.m[c.name]; !ok {
pHits, ok := m[c.name]
if !ok {
nameCopy := strings.Clone(c.name)
shard.m[nameCopy] = struct{}{}
hits := uint64(0)
pHits = &hits
m[nameCopy] = pHits
}
// Assume that the column is set for all the rows in the block.
// This is much faster than reading all the column values and counting non-empty rows.
*pHits += uint64(len(br.timestamps))
}
}
@ -94,15 +108,25 @@ func (pfp *pipeFieldNamesProcessor) flush() error {
// merge state across shards
shards := pfp.shards
m := shards[0].m
m := shards[0].getM()
shards = shards[1:]
for i := range shards {
for k := range shards[i].m {
m[k] = struct{}{}
for name, pHitsSrc := range shards[i].getM() {
pHits, ok := m[name]
if !ok {
m[name] = pHitsSrc
} else {
*pHits += *pHitsSrc
}
}
}
if pfp.pf.isFirstPipe {
m["_time"] = struct{}{}
pHits := m["_stream"]
if pHits == nil {
hits := uint64(0)
pHits = &hits
}
m["_time"] = pHits
}
// write result
@ -110,8 +134,11 @@ func (pfp *pipeFieldNamesProcessor) flush() error {
pfp: pfp,
}
wctx.rcs[0].name = pfp.pf.resultName
for k := range m {
wctx.writeRow(k)
wctx.rcs[1].name = "hits"
for name, pHits := range m {
hits := string(marshalUint64String(nil, *pHits))
wctx.writeRow(name, hits)
}
wctx.flush()
@ -120,7 +147,7 @@ func (pfp *pipeFieldNamesProcessor) flush() error {
type pipeFieldNamesWriteContext struct {
pfp *pipeFieldNamesProcessor
rcs [1]resultColumn
rcs [2]resultColumn
br blockResult
// rowsCount is the number of rows in the current block
@ -130,9 +157,10 @@ type pipeFieldNamesWriteContext struct {
valuesLen int
}
func (wctx *pipeFieldNamesWriteContext) writeRow(v string) {
wctx.rcs[0].addValue(v)
wctx.valuesLen += len(v)
func (wctx *pipeFieldNamesWriteContext) writeRow(name, hits string) {
wctx.rcs[0].addValue(name)
wctx.rcs[1].addValue(hits)
wctx.valuesLen += len(name) + len(hits)
wctx.rowsCount++
if wctx.valuesLen >= 1_000_000 {
wctx.flush()
@ -145,11 +173,12 @@ func (wctx *pipeFieldNamesWriteContext) flush() {
wctx.valuesLen = 0
// Flush rcs to ppBase
br.setResultColumns(wctx.rcs[:1], wctx.rowsCount)
br.setResultColumns(wctx.rcs[:], wctx.rowsCount)
wctx.rowsCount = 0
wctx.pfp.ppBase.writeBlock(0, br)
br.reset()
wctx.rcs[0].resetValues()
wctx.rcs[1].resetValues()
}
func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
@ -158,12 +187,20 @@ func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
}
lex.nextToken()
resultName := "name"
if lex.isKeyword("as") {
lex.nextToken()
}
resultName, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
name, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
}
resultName = name
} else if !lex.isKeyword("", "|") {
name, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
}
resultName = name
}
pf := &pipeFieldNames{

View file

@ -10,6 +10,7 @@ func TestParsePipeFieldNamesSuccess(t *testing.T) {
expectParsePipeSuccess(t, pipeStr)
}
f(`field_names`)
f(`field_names as x`)
}
@ -19,7 +20,6 @@ func TestParsePipeFieldNamesFailure(t *testing.T) {
expectParsePipeFailure(t, pipeStr)
}
f(`field_names`)
f(`field_names(foo)`)
f(`field_names a b`)
f(`field_names as`)
@ -32,32 +32,47 @@ func TestPipeFieldNames(t *testing.T) {
}
// single row, result column doesn't clash with original columns
f("field_names as x", [][]Field{
f("field_names", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
{"a", `test`},
},
}, [][]Field{
{
{"x", "_msg"},
{"name", "_msg"},
{"hits", "1"},
},
{
{"x", "a"},
{"name", "a"},
{"hits", "1"},
},
})
// single row, result column do clashes with original columns
f("field_names as _msg", [][]Field{
f("field_names as x", [][]Field{
{
{"_msg", `{"foo":"bar"}`},
{"a", `test`},
{"b", "aaa"},
},
{
{"a", `bar`},
},
{
{"a", `bar`},
{"c", `bar`},
},
}, [][]Field{
{
{"_msg", "_msg"},
{"x", "a"},
{"hits", "3"},
},
{
{"_msg", "a"},
{"x", "b"},
{"hits", "1"},
},
{
{"x", "c"},
{"hits", "1"},
},
})
}

View file

@ -137,7 +137,7 @@ func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult
if step.field != "" {
c := br.getColumnByName(step.field)
v := c.getValueAtRow(br, rowIdx)
if step.opt == "q" {
if step.fieldOpt == "q" {
b = strconv.AppendQuote(b, v)
} else {
b = append(b, v...)

View file

@ -477,14 +477,12 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
wctx.rcs = rcs
}
var tmpBuf []byte
byColumns := r.byColumns
byColumnsIsTime := r.byColumnsIsTime
for i := range byFields {
v := byColumns[i]
if byColumnsIsTime[i] {
tmpBuf = marshalTimestampRFC3339NanoString(tmpBuf[:0], r.timestamp)
v = bytesutil.ToUnsafeString(tmpBuf)
v = string(marshalTimestampRFC3339NanoString(nil, r.timestamp))
}
rcs[i].addValue(v)
wctx.valuesLen += len(v)

View file

@ -20,6 +20,9 @@ type pipeUniq struct {
// fields contains field names for returning unique values
byFields []string
// if hitsFieldName isn't empty, then the number of hits per each unique value is stored in this field.
hitsFieldName string
limit uint64
}
@ -28,6 +31,9 @@ func (pu *pipeUniq) String() string {
if len(pu.byFields) > 0 {
s += " by (" + fieldNamesString(pu.byFields) + ")"
}
if pu.hitsFieldName != "" {
s += " hits"
}
if pu.limit > 0 {
s += fmt.Sprintf(" limit %d", pu.limit)
}
@ -53,7 +59,6 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
shards[i] = pipeUniqProcessorShard{
pipeUniqProcessorShardNopad: pipeUniqProcessorShardNopad{
pu: pu,
m: make(map[string]struct{}),
stateSizeBudget: stateSizeBudgetChunk,
},
}
@ -98,8 +103,8 @@ type pipeUniqProcessorShardNopad struct {
// pu points to the parent pipeUniq.
pu *pipeUniq
// m holds unique rows.
m map[string]struct{}
// m holds per-row hits.
m map[string]*uint64
// keyBuf is a temporary buffer for building keys for m.
keyBuf []byte
@ -120,6 +125,7 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
return false
}
needHits := shard.pu.hitsFieldName != ""
byFields := shard.pu.byFields
if len(byFields) == 0 {
// Take into account all the columns in br.
@ -132,7 +138,7 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
}
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
shard.updateState(bytesutil.ToUnsafeString(keyBuf), 1)
}
shard.keyBuf = keyBuf
return true
@ -142,20 +148,34 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
c := br.getColumnByName(byFields[0])
if c.isConst {
v := c.valuesEncoded[0]
shard.updateState(v)
shard.updateState(v, uint64(len(br.timestamps)))
return true
}
if c.valueType == valueTypeDict {
for _, v := range c.dictValues {
shard.updateState(v)
if needHits {
a := encoding.GetUint64s(len(c.dictValues))
hits := a.A
valuesEncoded := c.getValuesEncoded(br)
for _, v := range valuesEncoded {
idx := unmarshalUint8(v)
hits[idx]++
}
for i, v := range c.dictValues {
shard.updateState(v, hits[i])
}
encoding.PutUint64s(a)
} else {
for _, v := range c.dictValues {
shard.updateState(v, 0)
}
}
return true
}
values := c.getValues(br)
for i, v := range values {
if i == 0 || values[i-1] != values[i] {
shard.updateState(v)
if needHits || i == 0 || values[i-1] != values[i] {
shard.updateState(v, 1)
}
}
return true
@ -174,7 +194,7 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
for i := range br.timestamps {
seenValue := true
for _, values := range columnValues {
if i == 0 || values[i-1] != values[i] {
if needHits || i == 0 || values[i-1] != values[i] {
seenValue = false
break
}
@ -187,19 +207,31 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
for _, values := range columnValues {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[i]))
}
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
shard.updateState(bytesutil.ToUnsafeString(keyBuf), 1)
}
shard.keyBuf = keyBuf
return true
}
func (shard *pipeUniqProcessorShard) updateState(v string) {
if _, ok := shard.m[v]; !ok {
func (shard *pipeUniqProcessorShard) updateState(v string, hits uint64) {
m := shard.getM()
pHits, ok := m[v]
if !ok {
vCopy := strings.Clone(v)
shard.m[vCopy] = struct{}{}
shard.stateSizeBudget -= len(vCopy) + int(unsafe.Sizeof(vCopy))
hits := uint64(0)
pHits = &hits
m[vCopy] = pHits
shard.stateSizeBudget -= len(vCopy) + int(unsafe.Sizeof(vCopy)+unsafe.Sizeof(hits)+unsafe.Sizeof(pHits))
}
*pHits += hits
}
func (shard *pipeUniqProcessorShard) getM() map[string]*uint64 {
if shard.m == nil {
shard.m = make(map[string]*uint64)
}
return shard.m
}
func (pup *pipeUniqProcessor) writeBlock(workerID uint, br *blockResult) {
@ -235,18 +267,27 @@ func (pup *pipeUniqProcessor) flush() error {
// merge state across shards
shards := pup.shards
m := shards[0].m
m := shards[0].getM()
shards = shards[1:]
for i := range shards {
if needStop(pup.stopCh) {
return nil
}
for k := range shards[i].m {
m[k] = struct{}{}
for k, pHitsSrc := range shards[i].getM() {
pHits, ok := m[k]
if !ok {
m[k] = pHitsSrc
} else {
*pHits += *pHitsSrc
}
}
}
// There is little sense in returning partial hits when the limit on the number of unique entries is reached.
// It is better from UX experience is to return zero hits instead.
resetHits := pup.pu.limit > 0 && uint64(len(m)) >= pup.pu.limit
// write result
wctx := &pipeUniqWriteContext{
pup: pup,
@ -254,8 +295,23 @@ func (pup *pipeUniqProcessor) flush() error {
byFields := pup.pu.byFields
var rowFields []Field
addHitsFieldIfNeeded := func(dst []Field, hits uint64) []Field {
if pup.pu.hitsFieldName == "" {
return dst
}
if resetHits {
hits = 0
}
hitsStr := string(marshalUint64String(nil, hits))
dst = append(dst, Field{
Name: pup.pu.hitsFieldName,
Value: hitsStr,
})
return dst
}
if len(byFields) == 0 {
for k := range m {
for k, pHits := range m {
if needStop(pup.stopCh) {
return nil
}
@ -280,11 +336,12 @@ func (pup *pipeUniqProcessor) flush() error {
Value: bytesutil.ToUnsafeString(value),
})
}
rowFields = addHitsFieldIfNeeded(rowFields, *pHits)
wctx.writeRow(rowFields)
}
} else if len(byFields) == 1 {
fieldName := byFields[0]
for k := range m {
for k, pHits := range m {
if needStop(pup.stopCh) {
return nil
}
@ -293,10 +350,11 @@ func (pup *pipeUniqProcessor) flush() error {
Name: fieldName,
Value: k,
})
rowFields = addHitsFieldIfNeeded(rowFields, *pHits)
wctx.writeRow(rowFields)
}
} else {
for k := range m {
for k, pHits := range m {
if needStop(pup.stopCh) {
return nil
}
@ -317,6 +375,7 @@ func (pup *pipeUniqProcessor) flush() error {
})
fieldIdx++
}
rowFields = addHitsFieldIfNeeded(rowFields, *pHits)
wctx.writeRow(rowFields)
}
}
@ -418,6 +477,16 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
pu.byFields = bfs
}
if lex.isKeyword("hits") {
lex.nextToken()
hitsFieldName := "hits"
for slices.Contains(pu.byFields, hitsFieldName) {
hitsFieldName += "s"
}
pu.hitsFieldName = hitsFieldName
}
if lex.isKeyword("limit") {
lex.nextToken()
n, ok := tryParseUint64(lex.token)

View file

@ -11,11 +11,15 @@ func TestParsePipeUniqSuccess(t *testing.T) {
}
f(`uniq`)
f(`uniq hits`)
f(`uniq limit 10`)
f(`uniq hits limit 10`)
f(`uniq by (x)`)
f(`uniq by (x) limit 10`)
f(`uniq by (x, y)`)
f(`uniq by (x, y) hits`)
f(`uniq by (x, y) limit 10`)
f(`uniq by (x, y) hits limit 10`)
}
func TestParsePipeUniqFailure(t *testing.T) {
@ -26,6 +30,7 @@ func TestParsePipeUniqFailure(t *testing.T) {
f(`uniq foo`)
f(`uniq by`)
f(`uniq by hits`)
f(`uniq by(x) limit`)
f(`uniq by(x) limit foo`)
}
@ -62,6 +67,62 @@ func TestPipeUniq(t *testing.T) {
},
})
f("uniq hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"b", "3"},
{"hits", "2"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
{"hits", "1"},
},
})
f("uniq hits limit 2", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"b", "3"},
{"hits", "0"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
{"hits", "0"},
},
})
f("uniq by (a)", [][]Field{
{
{"a", `2`},
@ -82,6 +143,27 @@ func TestPipeUniq(t *testing.T) {
},
})
f("uniq by (a) hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"hits", "3"},
},
})
f("uniq by (b)", [][]Field{
{
{"a", `2`},
@ -105,6 +187,31 @@ func TestPipeUniq(t *testing.T) {
},
})
f("uniq by (b) hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"b", "3"},
{"hits", "2"},
},
{
{"b", "54"},
{"hits", "1"},
},
})
f("uniq by (c)", [][]Field{
{
{"a", `2`},
@ -128,6 +235,31 @@ func TestPipeUniq(t *testing.T) {
},
})
f("uniq by (c) hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"c", ""},
{"hits", "2"},
},
{
{"c", "d"},
{"hits", "1"},
},
})
f("uniq by (d)", [][]Field{
{
{"a", `2`},
@ -148,6 +280,27 @@ func TestPipeUniq(t *testing.T) {
},
})
f("uniq by (d) hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"d", ""},
{"hits", "3"},
},
})
f("uniq by (a, b)", [][]Field{
{
{"a", `2`},
@ -172,6 +325,33 @@ func TestPipeUniq(t *testing.T) {
{"b", "54"},
},
})
f("uniq by (a, b) hits", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"b", "3"},
{"hits", "2"},
},
{
{"a", "2"},
{"b", "54"},
{"hits", "1"},
},
})
}
func TestPipeUniqUpdateNeededFields(t *testing.T) {

View file

@ -145,9 +145,9 @@ func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
}
// GetFieldNames returns field names from q results for the given tenantIDs.
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
pipes := append([]pipe{}, q.pipes...)
pipeStr := "field_names as names | sort by (names)"
pipeStr := "field_names"
lex := newLexer(pipeStr)
pf, err := parsePipeFieldNames(lex)
@ -156,36 +156,24 @@ func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Qu
}
pf.isFirstPipe = len(pipes) == 0
if !lex.isKeyword("|") {
logger.Panicf("BUG: unexpected token after 'field_names' pipe at [%s]: %q", pipeStr, lex.token)
}
lex.nextToken()
ps, err := parsePipeSort(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'sort' pipe at [%s]: %s", pipeStr, err)
}
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
pipes = append(pipes, pf, ps)
pipes = append(pipes, pf)
q = &Query{
f: q.f,
pipes: pipes,
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
return s.runValuesWithHitsQuery(ctx, tenantIDs, q)
}
// GetFieldValues returns unique values for the given fieldName returned by q for the given tenantIDs.
//
// If limit > 0, then up to limit unique values are returned.
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]string, error) {
func (s *Storage) getFieldValuesNoHits(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string) ([]string, error) {
pipes := append([]pipe{}, q.pipes...)
quotedFieldName := quoteTokenIfNeeded(fieldName)
pipeStr := fmt.Sprintf("uniq by (%s) limit %d | sort by (%s)", quotedFieldName, limit, quotedFieldName)
pipeStr := fmt.Sprintf("uniq by (%s)", quotedFieldName)
lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex)
@ -193,87 +181,17 @@ func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Q
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe at [%s]: %s", pipeStr, err)
}
if !lex.isKeyword("|") {
logger.Panicf("BUG: unexpected token after 'uniq' pipe at [%s]: %q", pipeStr, lex.token)
}
lex.nextToken()
ps, err := parsePipeSort(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'sort' pipe at [%s]: %s", pipeStr, err)
}
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
pipes = append(pipes, pu, ps)
pipes = append(pipes, pu)
q = &Query{
f: q.f,
pipes: pipes,
}
return s.runSingleColumnQuery(ctx, tenantIDs, q)
}
// GetStreamLabelNames returns stream label names from q results for the given tenantIDs.
func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
}
var names []string
m := make(map[string]struct{})
forEachStreamLabel(streams, func(label Field) {
if _, ok := m[label.Name]; !ok {
nameCopy := strings.Clone(label.Name)
names = append(names, nameCopy)
m[nameCopy] = struct{}{}
}
})
sortStrings(names)
return names, nil
}
// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs.
//
// If limit > 9, then up to limit unique label values are returned.
func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]string, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
}
var values []string
m := make(map[string]struct{})
forEachStreamLabel(streams, func(label Field) {
if label.Name != labelName {
return
}
if _, ok := m[label.Value]; !ok {
valueCopy := strings.Clone(label.Value)
values = append(values, valueCopy)
m[valueCopy] = struct{}{}
}
})
if uint64(len(values)) > limit {
values = values[:limit]
}
sortStrings(values)
return values, nil
}
// GetStreams returns streams from q results for the given tenantIDs.
//
// If limit > 0, then up to limit unique streams are returned.
func (s *Storage) GetStreams(ctx context.Context, tenantIDs []TenantID, q *Query, limit uint64) ([]string, error) {
return s.GetFieldValues(ctx, tenantIDs, q, "_stream", limit)
}
func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
var values []string
var valuesLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {
@ -283,13 +201,14 @@ func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID
cs := br.getColumns()
if len(cs) != 1 {
logger.Panicf("BUG: expecting only a single column; got %d columns", len(cs))
logger.Panicf("BUG: expecting one column; got %d columns", len(cs))
}
columnValues := cs[0].getValues(br)
columnValuesCopy := make([]string, len(columnValues))
for i, v := range columnValues {
columnValuesCopy[i] = strings.Clone(v)
for i := range columnValues {
columnValuesCopy[i] = strings.Clone(columnValues[i])
}
valuesLock.Lock()
@ -297,21 +216,182 @@ func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID
valuesLock.Unlock()
}
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
if err != nil {
if err := s.runQuery(ctx, tenantIDs, q, writeBlockResult); err != nil {
return nil, err
}
return values, nil
}
// GetFieldValues returns unique values with the number of hits for the given fieldName returned by q for the given tenantIDs.
//
// If limit > 0, then up to limit unique values are returned.
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
pipes := append([]pipe{}, q.pipes...)
quotedFieldName := quoteTokenIfNeeded(fieldName)
pipeStr := fmt.Sprintf("uniq by (%s) hits limit %d", quotedFieldName, limit)
lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe at [%s]: %s", pipeStr, err)
}
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
pipes = append(pipes, pu)
q = &Query{
f: q.f,
pipes: pipes,
}
return s.runValuesWithHitsQuery(ctx, tenantIDs, q)
}
// ValueWithHits contains value and hits.
type ValueWithHits struct {
Value string
Hits uint64
}
func toValuesWithHits(m map[string]*uint64) []ValueWithHits {
results := make([]ValueWithHits, 0, len(m))
for k, pHits := range m {
results = append(results, ValueWithHits{
Value: k,
Hits: *pHits,
})
}
sortValuesWithHits(results)
return results
}
func sortValuesWithHits(results []ValueWithHits) {
slices.SortFunc(results, func(a, b ValueWithHits) int {
if a.Hits == b.Hits {
if a.Value == b.Value {
return 0
}
if lessString(a.Value, b.Value) {
return -1
}
return 1
}
// Sort in descending order of hits
if a.Hits < b.Hits {
return 1
}
return -1
})
}
// GetStreamLabelNames returns stream label names from q results for the given tenantIDs.
func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
}
m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) {
pHits, ok := m[label.Name]
if !ok {
nameCopy := strings.Clone(label.Name)
hitsLocal := uint64(0)
pHits = &hitsLocal
m[nameCopy] = pHits
}
*pHits += hits
})
names := toValuesWithHits(m)
return names, nil
}
// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs.
//
// If limit > 9, then up to limit unique label values are returned.
func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
}
m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) {
if label.Name != labelName {
return
}
pHits, ok := m[label.Value]
if !ok {
valueCopy := strings.Clone(label.Value)
hitsLocal := uint64(0)
pHits = &hitsLocal
m[valueCopy] = pHits
}
*pHits += hits
})
values := toValuesWithHits(m)
if limit > 0 && uint64(len(values)) > limit {
values = values[:limit]
}
return values, nil
}
// GetStreams returns streams from q results for the given tenantIDs.
//
// If limit > 0, then up to limit unique streams are returned.
func (s *Storage) GetStreams(ctx context.Context, tenantIDs []TenantID, q *Query, limit uint64) ([]ValueWithHits, error) {
return s.GetFieldValues(ctx, tenantIDs, q, "_stream", limit)
}
func (s *Storage) runValuesWithHitsQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
var results []ValueWithHits
var resultsLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
cs := br.getColumns()
if len(cs) != 2 {
logger.Panicf("BUG: expecting two columns; got %d columns", len(cs))
}
columnValues := cs[0].getValues(br)
columnHits := cs[1].getValues(br)
valuesWithHits := make([]ValueWithHits, len(columnValues))
for i := range columnValues {
x := &valuesWithHits[i]
hits, _ := tryParseUint64(columnHits[i])
x.Value = strings.Clone(columnValues[i])
x.Hits = hits
}
resultsLock.Lock()
results = append(results, valuesWithHits...)
resultsLock.Unlock()
}
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
if err != nil {
return nil, err
}
sortValuesWithHits(results)
return results, nil
}
func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
if !hasFilterInWithQueryForFilter(q.f) && !hasFilterInWithQueryForPipes(q.pipes) {
return q, nil
}
getFieldValues := func(q *Query, fieldName string) ([]string, error) {
return s.GetFieldValues(ctx, tenantIDs, q, fieldName, 0)
return s.getFieldValuesNoHits(ctx, tenantIDs, q, fieldName)
}
cache := make(map[string][]string)
fNew, err := initFilterInValuesForFilter(cache, q.f, getFieldValues)
@ -1007,16 +1087,17 @@ func getFilterTimeRange(f filter) (int64, int64) {
return math.MinInt64, math.MaxInt64
}
func forEachStreamLabel(streams []string, f func(label Field)) {
func forEachStreamLabel(streams []ValueWithHits, f func(label Field, hits uint64)) {
var labels []Field
for _, stream := range streams {
for i := range streams {
var err error
labels, err = parseStreamLabels(labels[:0], stream)
labels, err = parseStreamLabels(labels[:0], streams[i].Value)
if err != nil {
continue
}
for i := range labels {
f(labels[i])
hits := streams[i].Hits
for j := range labels {
f(labels[j], hits)
}
}
}
@ -1042,7 +1123,7 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
name := s[:n]
s = s[n+1:]
value, nOffset := tryUnquoteString(s)
value, nOffset := tryUnquoteString(s, "")
if nOffset < 0 {
return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s)
}

View file

@ -3,7 +3,6 @@ package logstorage
import (
"context"
"fmt"
"regexp"
"sync/atomic"
"testing"
"time"
@ -582,7 +581,7 @@ func TestStorageSearch(t *testing.T) {
f,
&filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("message [02] at "),
re: mustCompileRegex("message [02] at "),
},
},
}

View file

@ -435,9 +435,9 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
}
func isDefaultRegex(expr string) bool {
prefix, suffix := regexutil.Simplify(expr)
prefix, suffix := regexutil.SimplifyPromRegex(expr)
if prefix != "" {
return false
}
return suffix == "(?-s:.*)"
return suffix == "(?s:.*)"
}

View file

@ -19,13 +19,21 @@ import (
//
// The rest of regexps are also optimized by returning cached match results for the same input strings.
type PromRegex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
// Suffix contains regex suffix left after removing the prefix.
// For example, suffix="a|b" for regex="foo(a|b)"
suffix string
// isOnlyPrefix is set to true if the regex contains only the prefix.
isOnlyPrefix bool
// isSuffixDotStar is set to true if suffix is ".*"
isSuffixDotStar bool
// isSuffixDotPlus is set to true if suffix is ".+"
isSuffixDotPlus bool
// substrDotStar contains literal string for regex suffix=".*string.*"
substrDotStar string
@ -46,18 +54,25 @@ func NewPromRegex(expr string) (*PromRegex, error) {
if _, err := regexp.Compile(expr); err != nil {
return nil, err
}
prefix, suffix := Simplify(expr)
orValues := GetOrValues(suffix)
substrDotStar := getSubstringLiteral(suffix, ".*")
substrDotPlus := getSubstringLiteral(suffix, ".+")
prefix, suffix := SimplifyPromRegex(expr)
sre := mustParseRegexp(suffix)
orValues := getOrValues(sre)
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
// Anchor suffix to the beginning and the end of the matching string.
suffixExpr := "^(?:" + suffix + ")$"
reSuffix := regexp.MustCompile(suffixExpr)
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
pr := &PromRegex{
exprStr: expr,
prefix: prefix,
suffix: suffix,
isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar,
isSuffixDotPlus: isSuffixDotPlus,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
@ -71,19 +86,25 @@ func NewPromRegex(expr string) (*PromRegex, error) {
// The pr is automatically anchored to the beginning and to the end
// of the matching string with '^' and '$'.
func (pr *PromRegex) MatchString(s string) bool {
if !strings.HasPrefix(s, pr.prefix) {
// Fast path - s has another prefix than pr.
return false
if pr.isOnlyPrefix {
return s == pr.prefix
}
s = s[len(pr.prefix):]
if len(pr.orValues) > 0 {
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
for _, v := range pr.orValues {
if s == v {
return true
}
if len(pr.prefix) > 0 {
if !strings.HasPrefix(s, pr.prefix) {
// Fast path - s has another prefix than pr.
return false
}
return false
s = s[len(pr.prefix):]
}
if pr.isSuffixDotStar {
// Fast path - the pr contains "prefix.*"
return true
}
if pr.isSuffixDotPlus {
// Fast path - the pr contains "prefix.+"
return len(s) > 0
}
if pr.substrDotStar != "" {
// Fast path - pr contains ".*someText.*"
@ -94,45 +115,22 @@ func (pr *PromRegex) MatchString(s string) bool {
n := strings.Index(s, pr.substrDotPlus)
return n > 0 && n+len(pr.substrDotPlus) < len(s)
}
switch pr.suffix {
case ".*":
// Fast path - the pr contains "prefix.*"
return true
case ".+":
// Fast path - the pr contains "prefix.+"
return len(s) > 0
if len(pr.orValues) > 0 {
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
for _, v := range pr.orValues {
if s == v {
return true
}
}
return false
}
// Fall back to slow path by matching the original regexp.
return pr.reSuffixMatcher.Match(s)
}
// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix.
//
// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo".
//
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
func getSubstringLiteral(expr, prefixSuffix string) string {
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return ""
}
if sre.Op == syntax.OpAlternate {
return ""
}
if !strings.HasPrefix(expr, prefixSuffix) {
return ""
}
expr = expr[len(prefixSuffix):]
if !strings.HasSuffix(expr, prefixSuffix) {
return ""
}
expr = expr[:len(expr)-len(prefixSuffix)]
prefix, suffix := Simplify(expr)
if suffix != "" {
return ""
}
return prefix
// String returns string representation of pr.
func (pr *PromRegex) String() string {
return pr.exprStr
}

View file

@ -8,6 +8,7 @@ import (
func TestPromRegexParseFailure(t *testing.T) {
f := func(expr string) {
t.Helper()
pr, err := NewPromRegex(expr)
if err == nil {
t.Fatalf("expecting non-nil error for expr=%s", expr)
@ -23,10 +24,15 @@ func TestPromRegexParseFailure(t *testing.T) {
func TestPromRegex(t *testing.T) {
f := func(expr, s string, resultExpected bool) {
t.Helper()
pr, err := NewPromRegex(expr)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
exprResult := pr.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := pr.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected)
@ -40,6 +46,7 @@ func TestPromRegex(t *testing.T) {
t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
}
}
f("", "", true)
f("", "foo", false)
f("foo", "", false)
@ -118,4 +125,8 @@ func TestPromRegex(t *testing.T) {
f(".*;|;.*", "foo;bar", false)
f(".*;|;.*", "foo;", true)
f(".*;|;.*", ";foo", true)
f(".*foo(bar|baz)", "fooxfoobaz", true)
f(".*foo(bar|baz)", "fooxfooban", false)
f(".*foo(bar|baz)", "fooxfooban foobar", true)
}

209
lib/regexutil/regex.go Normal file
View file

@ -0,0 +1,209 @@
package regexutil
import (
"regexp"
"regexp/syntax"
"strings"
)
// Regex implements an optimized string matching for Go regex.
//
// The following regexs are optimized:
//
// - plain string such as "foobar"
// - alternate strings such as "foo|bar|baz"
// - prefix match such as "foo.*" or "foo.+"
// - substring match such as ".*foo.*" or ".+bar.+"
type Regex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
// isOnlyPrefix is set to true if the regex contains only the prefix.
isOnlyPrefix bool
// isSuffixDotStar is set to true if suffix is ".*"
isSuffixDotStar bool
// isSuffixDotPlus is set to true if suffix is ".+"
isSuffixDotPlus bool
// substrDotStar contains literal string for regex suffix=".*string.*"
substrDotStar string
// substrDotPlus contains literal string for regex suffix=".+string.+"
substrDotPlus string
// orValues contains or values for the suffix regex.
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
orValues []string
// suffixRe is the regexp for suffix
suffixRe *regexp.Regexp
}
// NewRegex returns Regex for the given expr.
func NewRegex(expr string) (*Regex, error) {
if _, err := regexp.Compile(expr); err != nil {
return nil, err
}
prefix, suffix := SimplifyRegex(expr)
sre := mustParseRegexp(suffix)
orValues := getOrValues(sre)
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
suffixAnchored := suffix
if len(prefix) > 0 {
suffixAnchored = "^(?:" + suffix + ")"
}
// The suffixAnchored must be properly compiled, since it has been already checked above.
// Otherwise it is a bug, which must be fixed.
suffixRe := regexp.MustCompile(suffixAnchored)
r := &Regex{
exprStr: expr,
prefix: prefix,
isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar,
isSuffixDotPlus: isSuffixDotPlus,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
suffixRe: suffixRe,
}
return r, nil
}
// MatchString returns true if s matches r.
func (r *Regex) MatchString(s string) bool {
if r.isOnlyPrefix {
return strings.Contains(s, r.prefix)
}
if len(r.prefix) == 0 {
return r.matchStringNoPrefix(s)
}
return r.matchStringWithPrefix(s)
}
// GetLiterals returns literals for r.
func (r *Regex) GetLiterals() []string {
sre := mustParseRegexp(r.exprStr)
for sre.Op == syntax.OpCapture {
sre = sre.Sub[0]
}
v, ok := getLiteral(sre)
if ok {
return []string{v}
}
if sre.Op != syntax.OpConcat {
return nil
}
var a []string
for _, sub := range sre.Sub {
v, ok := getLiteral(sub)
if ok {
a = append(a, v)
}
}
return a
}
// String returns string represetnation for r
func (r *Regex) String() string {
return r.exprStr
}
func (r *Regex) matchStringNoPrefix(s string) bool {
if r.isSuffixDotStar {
return true
}
if r.isSuffixDotPlus {
return len(s) > 0
}
if r.substrDotStar != "" {
// Fast path - r contains ".*someText.*"
return strings.Contains(s, r.substrDotStar)
}
if r.substrDotPlus != "" {
// Fast path - r contains ".+someText.+"
n := strings.Index(s, r.substrDotPlus)
return n > 0 && n+len(r.substrDotPlus) < len(s)
}
if len(r.orValues) == 0 {
// Fall back to slow path by matching the suffix regexp.
return r.suffixRe.MatchString(s)
}
// Fast path - compare s to r.orValues
for _, v := range r.orValues {
if strings.Contains(s, v) {
return true
}
}
return false
}
func (r *Regex) matchStringWithPrefix(s string) bool {
n := strings.Index(s, r.prefix)
if n < 0 {
// Fast path - s doesn't contain the needed prefix
return false
}
sNext := s[n+1:]
s = s[n+len(r.prefix):]
if r.isSuffixDotStar {
return true
}
if r.isSuffixDotPlus {
return len(s) > 0
}
if r.substrDotStar != "" {
// Fast path - r contains ".*someText.*"
return strings.Contains(s, r.substrDotStar)
}
if r.substrDotPlus != "" {
// Fast path - r contains ".+someText.+"
n := strings.Index(s, r.substrDotPlus)
return n > 0 && n+len(r.substrDotPlus) < len(s)
}
for {
if len(r.orValues) == 0 {
// Fall back to slow path by matching the suffix regexp.
if r.suffixRe.MatchString(s) {
return true
}
} else {
// Fast path - compare s to r.orValues
for _, v := range r.orValues {
if strings.HasPrefix(s, v) {
return true
}
}
}
// Mismatch. Try again starting from the next char.
s = sNext
n := strings.Index(s, r.prefix)
if n < 0 {
// Fast path - s doesn't contain the needed prefix
return false
}
sNext = s[n+1:]
s = s[n+len(r.prefix):]
}
}

171
lib/regexutil/regex_test.go Normal file
View file

@ -0,0 +1,171 @@
package regexutil
import (
"reflect"
"testing"
)
func TestNewRegexFailure(t *testing.T) {
f := func(expr string) {
t.Helper()
r, err := NewRegex(expr)
if err == nil {
t.Fatalf("expecting non-nil error when parsing %q; got %q", expr, r)
}
}
f("[foo")
f("(foo")
}
func TestRegexMatchString(t *testing.T) {
f := func(expr, s string, resultExpected bool) {
t.Helper()
r, err := NewRegex(expr)
if err != nil {
t.Fatalf("cannot parse %q: %s", expr, err)
}
exprResult := r.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := r.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, expr, result, resultExpected)
}
}
f("", "", true)
f("", "foo", true)
f("foo", "", false)
f(".*", "", true)
f(".*", "foo", true)
f(".+", "", false)
f(".+", "foo", true)
f("foo.*", "bar", false)
f("foo.*", "foo", true)
f("foo.*", "a foo", true)
f("foo.*", "a foo a", true)
f("foo.*", "foobar", true)
f("foo.*", "a foobar", true)
f("foo.+", "bar", false)
f("foo.+", "foo", false)
f("foo.+", "a foo", false)
f("foo.+", "foobar", true)
f("foo.+", "a foobar", true)
f("foo|bar", "", false)
f("foo|bar", "a", false)
f("foo|bar", "foo", true)
f("foo|bar", "a foo", true)
f("foo|bar", "foo a", true)
f("foo|bar", "a foo a", true)
f("foo|bar", "bar", true)
f("foo|bar", "foobar", true)
f("foo(bar|baz)", "a", false)
f("foo(bar|baz)", "foobar", true)
f("foo(bar|baz)", "foobaz", true)
f("foo(bar|baz)", "foobaza", true)
f("foo(bar|baz)", "a foobaz a", true)
f("foo(bar|baz)", "foobal", false)
f("^foo|b(ar)$", "foo", true)
f("^foo|b(ar)$", "foo a", true)
f("^foo|b(ar)$", "a foo", false)
f("^foo|b(ar)$", "bar", true)
f("^foo|b(ar)$", "a bar", true)
f("^foo|b(ar)$", "barz", false)
f("^foo|b(ar)$", "ar", false)
f(".*foo.*", "foo", true)
f(".*foo.*", "afoobar", true)
f(".*foo.*", "abc", false)
f("foo.*bar.*", "foobar", true)
f("foo.*bar.*", "foo_bar_", true)
f("foo.*bar.*", "a foo bar baz", true)
f("foo.*bar.*", "foobaz", false)
f("foo.*bar.*", "baz foo", false)
f(".+foo.+", "foo", false)
f(".+foo.+", "afoobar", true)
f(".+foo.+", "afoo", false)
f(".+foo.+", "abc", false)
f("foo.+bar.+", "foobar", false)
f("foo.+bar.+", "foo_bar_", true)
f("foo.+bar.+", "a foo_bar_", true)
f("foo.+bar.+", "foobaz", false)
f("foo.+bar.+", "abc", false)
f(".+foo.*", "foo", false)
f(".+foo.*", "afoo", true)
f(".+foo.*", "afoobar", true)
f(".*(a|b).*", "a", true)
f(".*(a|b).*", "ax", true)
f(".*(a|b).*", "xa", true)
f(".*(a|b).*", "xay", true)
f(".*(a|b).*", "xzy", false)
f("^(?:true)$", "true", true)
f("^(?:true)$", "false", false)
f(".+;|;.+", ";", false)
f(".+;|;.+", "foo", false)
f(".+;|;.+", "foo;bar", true)
f(".+;|;.+", "foo;", true)
f(".+;|;.+", ";foo", true)
f(".+foo|bar|baz.+", "foo", false)
f(".+foo|bar|baz.+", "afoo", true)
f(".+foo|bar|baz.+", "fooa", false)
f(".+foo|bar|baz.+", "afooa", true)
f(".+foo|bar|baz.+", "bar", true)
f(".+foo|bar|baz.+", "abar", true)
f(".+foo|bar|baz.+", "abara", true)
f(".+foo|bar|baz.+", "bara", true)
f(".+foo|bar|baz.+", "baz", false)
f(".+foo|bar|baz.+", "baza", true)
f(".+foo|bar|baz.+", "abaz", false)
f(".+foo|bar|baz.+", "abaza", true)
f(".+foo|bar|baz.+", "afoo|bar|baza", true)
f(".+(foo|bar|baz).+", "bar", false)
f(".+(foo|bar|baz).+", "bara", false)
f(".+(foo|bar|baz).+", "abar", false)
f(".+(foo|bar|baz).+", "abara", true)
f(".+(foo|bar|baz).+", "afooa", true)
f(".+(foo|bar|baz).+", "abaza", true)
f(".*;|;.*", ";", true)
f(".*;|;.*", "foo", false)
f(".*;|;.*", "foo;bar", true)
f(".*;|;.*", "foo;", true)
f(".*;|;.*", ";foo", true)
f("^bar", "foobarbaz", false)
f("^foo", "foobarbaz", true)
f("bar$", "foobarbaz", false)
f("baz$", "foobarbaz", true)
f("(bar$|^foo)", "foobarbaz", true)
f("(bar$^boo)", "foobarbaz", false)
f("foo(bar|baz)", "a fooxfoobaz a", true)
f("foo(bar|baz)", "a fooxfooban a", false)
f("foo(bar|baz)", "a fooxfooban foobar a", true)
}
func TestGetLiterals(t *testing.T) {
f := func(expr string, literalsExpected []string) {
t.Helper()
r, err := NewRegex(expr)
if err != nil {
t.Fatalf("cannot parse %q: %s", expr, err)
}
literals := r.GetLiterals()
if !reflect.DeepEqual(literals, literalsExpected) {
t.Fatalf("unexpected literals; got %q; want %q", literals, literalsExpected)
}
}
f("", nil)
f("foo bar baz", []string{"foo bar baz"})
f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"})
f("(foo[ab](?:bar))", []string{"foo", "bar"})
f("foo|bar", nil)
f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"})
f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"})
f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"})
}

View file

@ -0,0 +1,111 @@
package regexutil
import (
"fmt"
"regexp"
"testing"
)
func BenchmarkRegexMatchString(b *testing.B) {
b.Run("unpotimized-noprefix-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "xbar.*|baz", "axbarz", true)
})
b.Run("unpotimized-noprefix-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "xbar.*|baz", "zfoobaxz", false)
})
b.Run("unpotimized-prefix-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo(bar.*|baz)", "afoobarz", true)
})
b.Run("unpotimized-prefix-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo(bar.*|baz)", "zfoobaxz", false)
})
b.Run("dot-star-match", func(b *testing.B) {
benchmarkRegexMatchString(b, ".*", "foo", true)
})
b.Run("dot-plus-match", func(b *testing.B) {
benchmarkRegexMatchString(b, ".+", "foo", true)
})
b.Run("dot-plus-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, ".+", "", false)
})
b.Run("literal-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo", "afoobar", true)
})
b.Run("literal-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo", "abaraa", false)
})
b.Run("prefix-dot-star-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo.*", "afoobar", true)
})
b.Run("prefix-dot-star-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo.*", "axoobar", false)
})
b.Run("prefix-dot-plus-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo.+", "afoobar", true)
})
b.Run("prefix-dot-plus-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo.+", "axoobar", false)
})
b.Run("or-values-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo|bar|baz", "abaz", true)
})
b.Run("or-values-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "foo|bar|baz", "axaz", false)
})
b.Run("prefix-or-values-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "x(foo|bar|baz)", "axbaz", true)
})
b.Run("prefix-or-values-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "x(foo|bar|baz)", "aabaz", false)
})
b.Run("substring-dot-star-match", func(b *testing.B) {
benchmarkRegexMatchString(b, ".*foo.*", "afoobar", true)
})
b.Run("substring-dot-star-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, ".*foo.*", "abarbaz", false)
})
b.Run("substring-dot-plus-match", func(b *testing.B) {
benchmarkRegexMatchString(b, ".+foo.+", "afoobar", true)
})
b.Run("substring-dot-plus-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, ".+foo.+", "abarbaz", false)
})
b.Run("prefix-substring-dot-star-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "a.*foo.*", "bafoobar", true)
})
b.Run("prefix-substring-dot-star-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "a.*foo.*", "babarbaz", false)
})
b.Run("prefix-substring-dot-plus-match", func(b *testing.B) {
benchmarkRegexMatchString(b, "a.+foo.+", "babfoobar", true)
})
b.Run("prefix-substring-dot-plus-mismatch", func(b *testing.B) {
benchmarkRegexMatchString(b, "a.+foo.+", "babarbaz", false)
})
}
func benchmarkRegexMatchString(b *testing.B, expr, s string, resultExpected bool) {
r, err := NewRegex(expr)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
}
re := regexp.MustCompile(expr)
f := func(b *testing.B, matchString func(s string) bool) {
b.SetBytes(1)
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
result := matchString(s)
if result != resultExpected {
panic(fmt.Errorf("unexpected result when matching %s against %s; got %v; want %v", s, expr, result, resultExpected))
}
}
})
}
b.Run("Regex", func(b *testing.B) {
f(b, r.MatchString)
})
b.Run("StandardRegex", func(b *testing.B) {
f(b, re.MatchString)
})
}

View file

@ -18,24 +18,38 @@ func RemoveStartEndAnchors(expr string) string {
return expr
}
// GetOrValues returns "or" values from the given regexp expr.
// GetOrValuesRegex returns "or" values from the given regexp expr.
//
// It returns ["foo", "bar"] for "foo|bar" regexp.
// It returns ["foo"] for "foo" regexp.
// It returns [""] for "" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValuesRegex(expr string) []string {
return getOrValuesRegex(expr, true)
}
// GetOrValuesPromRegex returns "or" values from the given Prometheus-like regexp expr.
//
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
// It returns ["foo", "bar"] for "foo|bar" regexp.
// It returns ["foo"] for "foo" regexp.
// It returns [""] for "" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValues(expr string) []string {
func GetOrValuesPromRegex(expr string) []string {
expr = RemoveStartEndAnchors(expr)
prefix, tailExpr := Simplify(expr)
return getOrValuesRegex(expr, false)
}
func getOrValuesRegex(expr string, keepAnchors bool) []string {
prefix, tailExpr := simplifyRegex(expr, keepAnchors)
if tailExpr == "" {
return []string{prefix}
}
sre, err := syntax.Parse(tailExpr, syntax.Perl)
sre, err := parseRegexp(tailExpr)
if err != nil {
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
return nil
}
orValues := getOrValuesExt(sre)
orValues := getOrValues(sre)
// Sort orValues for faster index seek later
sort.Strings(orValues)
@ -50,21 +64,22 @@ func GetOrValues(expr string) []string {
return orValues
}
func getOrValuesExt(sre *syntax.Regexp) []string {
func getOrValues(sre *syntax.Regexp) []string {
switch sre.Op {
case syntax.OpCapture:
return getOrValuesExt(sre.Sub[0])
return getOrValues(sre.Sub[0])
case syntax.OpLiteral:
if !isLiteral(sre) {
v, ok := getLiteral(sre)
if !ok {
return nil
}
return []string{string(sre.Rune)}
return []string{v}
case syntax.OpEmptyMatch:
return []string{""}
case syntax.OpAlternate:
a := make([]string, 0, len(sre.Sub))
for _, reSub := range sre.Sub {
ca := getOrValuesExt(reSub)
ca := getOrValues(reSub)
if len(ca) == 0 {
return nil
}
@ -94,7 +109,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
if len(sre.Sub) < 1 {
return []string{""}
}
prefixes := getOrValuesExt(sre.Sub[0])
prefixes := getOrValues(sre.Sub[0])
if len(prefixes) == 0 {
return nil
}
@ -102,7 +117,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
return prefixes
}
sre.Sub = sre.Sub[1:]
suffixes := getOrValuesExt(sre)
suffixes := getOrValues(sre)
if len(suffixes) == 0 {
return nil
}
@ -123,46 +138,87 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
}
}
func isLiteral(sre *syntax.Regexp) bool {
func getLiteral(sre *syntax.Regexp) (string, bool) {
if sre.Op == syntax.OpCapture {
return isLiteral(sre.Sub[0])
return getLiteral(sre.Sub[0])
}
return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
if sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0 {
return string(sre.Rune), true
}
return "", false
}
const maxOrValues = 100
// Simplify simplifies the given expr.
// SimplifyRegex simplifies the given regexp expr.
//
// It returns plaintext pefix and the remaining regular expression
// without capturing parens.
func SimplifyRegex(expr string) (string, string) {
prefix, suffix := simplifyRegex(expr, true)
sre := mustParseRegexp(suffix)
if isDotOp(sre, syntax.OpStar) {
return prefix, ""
}
if sre.Op == syntax.OpConcat {
subs := sre.Sub
if prefix == "" {
// Drop .* at the start
for len(subs) > 0 && isDotOp(subs[0], syntax.OpStar) {
subs = subs[1:]
}
}
// Drop .* at the end.
for len(subs) > 0 && isDotOp(subs[len(subs)-1], syntax.OpStar) {
subs = subs[:len(subs)-1]
}
sre.Sub = subs
if len(subs) == 0 {
return prefix, ""
}
suffix = sre.String()
}
return prefix, suffix
}
// SimplifyPromRegex simplifies the given Prometheus-like expr.
//
// It returns plaintext prefix and the remaining regular expression
// with dropped '^' and '$' anchors at the beginning and the end
// with dropped '^' and '$' anchors at the beginning and at the end
// of the regular expression.
//
// The function removes capturing parens from the expr,
// so it cannot be used when capturing parens are necessary.
func Simplify(expr string) (string, string) {
sre, err := syntax.Parse(expr, syntax.Perl)
func SimplifyPromRegex(expr string) (string, string) {
return simplifyRegex(expr, false)
}
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
sre, err := parseRegexp(expr)
if err != nil {
// Cannot parse the regexp. Return it all as prefix.
return expr, ""
}
sre = simplifyRegexp(sre, false)
sre = simplifyRegexp(sre, keepAnchors, keepAnchors)
if sre == emptyRegexp {
return "", ""
}
if isLiteral(sre) {
return string(sre.Rune), ""
v, ok := getLiteral(sre)
if ok {
return v, ""
}
var prefix string
if sre.Op == syntax.OpConcat {
sub0 := sre.Sub[0]
if isLiteral(sub0) {
prefix = string(sub0.Rune)
prefix, ok = getLiteral(sre.Sub[0])
if ok {
sre.Sub = sre.Sub[1:]
if len(sre.Sub) == 0 {
return prefix, ""
}
sre = simplifyRegexp(sre, true)
sre = simplifyRegexp(sre, true, keepAnchors)
}
}
if _, err := syntax.Compile(sre); err != nil {
@ -171,44 +227,42 @@ func Simplify(expr string) (string, string) {
}
s := sre.String()
s = strings.ReplaceAll(s, "(?:)", "")
s = strings.ReplaceAll(s, "(?-s:.)", ".")
s = strings.ReplaceAll(s, "(?-m:$)", "$")
s = strings.ReplaceAll(s, "(?s:.)", ".")
s = strings.ReplaceAll(s, "(?m:$)", "$")
return prefix, s
}
func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
func simplifyRegexp(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
s := sre.String()
for {
sre = simplifyRegexpExt(sre, hasPrefix, false)
sre = simplifyRegexpExt(sre, keepBeginOp, keepEndOp)
sre = sre.Simplify()
if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
if !keepBeginOp && sre.Op == syntax.OpBeginText {
sre = emptyRegexp
} else if !keepEndOp && sre.Op == syntax.OpEndText {
sre = emptyRegexp
}
sNew := sre.String()
if sNew == s {
return sre
}
var err error
sre, err = syntax.Parse(sNew, syntax.Perl)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
}
sre = mustParseRegexp(sNew)
s = sNew
}
}
func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
func simplifyRegexpExt(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
switch sre.Op {
case syntax.OpCapture:
// Substitute all the capture regexps with non-capture regexps.
sre.Op = syntax.OpAlternate
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
@ -216,13 +270,13 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
case syntax.OpAlternate:
// Do not remove empty captures from OpAlternate, since this may break regexp.
for i, sub := range sre.Sub {
sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
sre.Sub[i] = simplifyRegexpExt(sub, keepBeginOp, keepEndOp)
}
return sre
case syntax.OpConcat:
subs := sre.Sub[:0]
for i, sub := range sre.Sub {
sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub))
sub = simplifyRegexpExt(sub, keepBeginOp || len(subs) > 0, keepEndOp || i+1 < len(sre.Sub))
if sub != emptyRegexp {
subs = append(subs, sub)
}
@ -230,12 +284,12 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
sre.Sub = subs
// Remove anchros from the beginning and the end of regexp, since they
// will be added later.
if !hasPrefix {
if !keepBeginOp {
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
sre.Sub = sre.Sub[1:]
}
}
if !hasSuffix {
if !keepEndOp {
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
sre.Sub = sre.Sub[:len(sre.Sub)-1]
}
@ -254,6 +308,44 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
}
}
// getSubstringLiteral returns regex part from sre surrounded by .+ or .* depending on the prefixSuffixOp.
//
// For example, if sre=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
//
// An empty string is returned if sre doesn't contain the given prefixSuffixOp prefix and suffix.
func getSubstringLiteral(sre *syntax.Regexp, prefixSuffixOp syntax.Op) string {
if sre.Op != syntax.OpConcat || len(sre.Sub) != 3 {
return ""
}
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) {
return ""
}
v, ok := getLiteral(sre.Sub[1])
if !ok {
return ""
}
return v
}
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
if sre.Op != op {
return false
}
return sre.Sub[0].Op == syntax.OpAnyChar
}
var emptyRegexp = &syntax.Regexp{
Op: syntax.OpEmptyMatch,
}
func parseRegexp(expr string) (*syntax.Regexp, error) {
return syntax.Parse(expr, syntax.Perl|syntax.DotNL)
}
func mustParseRegexp(expr string) *syntax.Regexp {
sre, err := parseRegexp(expr)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse already verified regexp %q: %w", expr, err))
}
return sre
}

View file

@ -5,10 +5,51 @@ import (
"testing"
)
func TestGetOrValues(t *testing.T) {
func TestGetOrValuesRegex(t *testing.T) {
f := func(s string, valuesExpected []string) {
t.Helper()
values := GetOrValues(s)
values := GetOrValuesRegex(s)
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("unexpected values for s=%q; got %q; want %q", s, values, valuesExpected)
}
}
f("", []string{""})
f("foo", []string{"foo"})
f("^foo$", nil)
f("|foo", []string{"", "foo"})
f("|foo|", []string{"", "", "foo"})
f("foo.+", nil)
f("foo.*", nil)
f(".*", nil)
f("foo|.*", nil)
f("(fo((o)))|(bar)", []string{"bar", "foo"})
f("foobar", []string{"foobar"})
f("z|x|c", []string{"c", "x", "z"})
f("foo|bar", []string{"bar", "foo"})
f("(foo|bar)", []string{"bar", "foo"})
f("(foo|bar)baz", []string{"barbaz", "foobaz"})
f("[a-z][a-z]", nil)
f("[a-d]", []string{"a", "b", "c", "d"})
f("x[a-d]we", []string{"xawe", "xbwe", "xcwe", "xdwe"})
f("foo(bar|baz)", []string{"foobar", "foobaz"})
f("foo(ba[rz]|(xx|o))", []string{"foobar", "foobaz", "fooo", "fooxx"})
f("foo(?:bar|baz)x(qwe|rt)", []string{"foobarxqwe", "foobarxrt", "foobazxqwe", "foobazxrt"})
f("foo(bar||baz)", []string{"foo", "foobar", "foobaz"})
f("(a|b|c)(d|e|f|0|1|2)(g|h|k|x|y|z)", nil)
f("(?i)foo", nil)
f("(?i)(foo|bar)", nil)
f("^foo|bar$", nil)
f("^(foo|bar)$", nil)
f("^a(foo|b(?:a|r))$", nil)
f("^a(foo$|b(?:a$|r))$", nil)
f("^a(^foo|bar$)z$", nil)
}
func TestGetOrValuesPromRegex(t *testing.T) {
f := func(s string, valuesExpected []string) {
t.Helper()
values := GetOrValuesPromRegex(s)
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("unexpected values for s=%q; got %q; want %q", s, values, valuesExpected)
}
@ -46,10 +87,87 @@ func TestGetOrValues(t *testing.T) {
f("^a(^foo|bar$)z$", nil)
}
func TestSimplify(t *testing.T) {
func TestSimplifyRegex(t *testing.T) {
f := func(s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := Simplify(s)
prefix, suffix := SimplifyRegex(s)
if prefix != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if suffix != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f("", "", "")
f(".*", "", "")
f(".*(.*).*", "", "")
f("foo.*", "foo", "")
f(".*foo.*", "", "foo")
f("^", "", "\\A")
f("$", "", "(?-m:$)")
f("^()$", "", "(?-m:\\A$)")
f("^(?:)$", "", "(?-m:\\A$)")
f("^foo|^bar$|baz", "", "(?-m:\\Afoo|\\Abar$|baz)")
f("^(foo$|^bar)$", "", "(?-m:\\A(?:foo$|\\Abar)$)")
f("^a(foo$|bar)$", "", "(?-m:\\Aa(?:foo$|bar)$)")
f("^a(^foo|bar$)z$", "", "(?-m:\\Aa(?:\\Afoo|bar$)z$)")
f("foobar", "foobar", "")
f("foo$|^foobar", "", "(?-m:foo$|\\Afoobar)")
f("^(foo$|^foobar)$", "", "(?-m:\\A(?:foo$|\\Afoobar)$)")
f("foobar|foobaz", "fooba", "[rz]")
f("(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
f("(тестЧЧ|тест)", "тест", "ЧЧ|")
f("foo(bar|baz|bana)", "fooba", "[rz]|na")
f("^foobar|foobaz", "", "\\Afoobar|foobaz")
f("^foobar|^foobaz$", "", "(?-m:\\Afoobar|\\Afoobaz$)")
f("foobar|foobaz", "fooba", "[rz]")
f("(?:^foobar|^foobaz)aa.*", "", "(?:\\Afoobar|\\Afoobaz)aa")
f("foo[bar]+", "foo", "[abr]+")
f("foo[a-z]+", "foo", "[a-z]+")
f("foo[bar]*", "foo", "[abr]*")
f("foo[a-z]*", "foo", "[a-z]*")
f("foo[x]+", "foo", "x+")
f("foo[^x]+", "foo", "[^x]+")
f("foo[x]*", "foo", "x*")
f("foo[^x]*", "foo", "[^x]*")
f("foo[x]*bar", "foo", "x*bar")
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f("foo.+bar", "foo", "(?s:.+bar)")
f("a(b|c.*).+", "a", "(?s:(?:b|c.*).+)")
f("ab|ac", "a", "[bc]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps
f("a(", "a(", "")
f("a[", "a[", "")
f("a[]", "a[]", "")
f("a{", "a{", "")
f("a{}", "a{}", "")
f("invalid(regexp", "invalid(regexp", "")
// The transformed regexp mustn't match aba
f("a?(^ba|c)", "", "a?(?:\\Aba|c)")
// The transformed regexp mustn't match barx
f("(foo|bar$)x*", "", "(?-m:(?:foo|bar$)x*)")
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", "", "(?s:.+;|;.+)")
f("^(.+);|;(.+)$", "", "(?s-m:\\A.+;|;.+$)")
f("^(.+);$|^;(.+)$", "", "(?s-m:\\A.+;$|\\A;.+$)")
f(".*;|;.*", "", "(?s:.*;|;.*)")
f("^(.*);|;(.*)$", "", "(?s-m:\\A.*;|;.*$)")
f("^(.*);$|^;(.*)$", "", "(?s-m:\\A.*;$|\\A;.*$)")
}
func TestSimplifyPromRegex(t *testing.T) {
f := func(s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := SimplifyPromRegex(s)
if prefix != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
@ -77,7 +195,7 @@ func TestSimplify(t *testing.T) {
f("^foobar|foobaz", "fooba", "[rz]")
f("^foobar|^foobaz$", "fooba", "[rz]")
f("foobar|foobaz", "fooba", "[rz]")
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?-s:[rz]aa.*)")
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?s:[rz]aa.*)")
f("foo[bar]+", "foo", "[abr]+")
f("foo[a-z]+", "foo", "[a-z]+")
f("foo[bar]*", "foo", "[abr]*")
@ -88,12 +206,12 @@ func TestSimplify(t *testing.T) {
f("foo[^x]*", "foo", "[^x]*")
f("foo[x]*bar", "foo", "x*bar")
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f("foo.+bar", "foo", "(?-s:.+bar)")
f("a(b|c.*).+", "a", "(?-s:(?:b|c.*).+)")
f("foo.+bar", "foo", "(?s:.+bar)")
f("a(b|c.*).+", "a", "(?s:(?:b|c.*).+)")
f("ab|ac", "a", "[bc]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?i-s:UP.+X)")
f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps
@ -111,12 +229,12 @@ func TestSimplify(t *testing.T) {
f("(foo|bar$)x*", "", "(?-m:(?:foo|bar$)x*)")
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", "", "(?-s:.+;|;.+)")
f("^(.+);|;(.+)$", "", "(?-s:.+;|;.+)")
f("^(.+);$|^;(.+)$", "", "(?-s:.+;|;.+)")
f(".*;|;.*", "", "(?-s:.*;|;.*)")
f("^(.*);|;(.*)$", "", "(?-s:.*;|;.*)")
f("^(.*);$|^;(.*)$", "", "(?-s:.*;|;.*)")
f(".+;|;.+", "", "(?s:.+;|;.+)")
f("^(.+);|;(.+)$", "", "(?s:.+;|;.+)")
f("^(.+);$|^;(.+)$", "", "(?s:.+;|;.+)")
f(".*;|;.*", "", "(?s:.*;|;.*)")
f("^(.*);|;(.*)$", "", "(?s:.*;|;.*)")
f("^(.*);$|^;(.*)$", "", "(?s:.*;|;.*)")
}
func TestRemoveStartEndAnchors(t *testing.T) {

View file

@ -548,7 +548,7 @@ func getRegexpFromCache(expr string) (*regexpCacheValue, error) {
}
sExpr := expr
orValues := regexutil.GetOrValues(sExpr)
orValues := regexutil.GetOrValuesPromRegex(sExpr)
var reMatch func(b []byte) bool
var reCost uint64
var literalSuffix string
@ -881,7 +881,7 @@ func simplifyRegexp(expr string) (string, string) {
// Make a copy of expr before using it,
// since it may be constructed via bytesutil.ToUnsafeString()
expr = string(append([]byte{}, expr...))
prefix, suffix := regexutil.Simplify(expr)
prefix, suffix := regexutil.SimplifyPromRegex(expr)
// Put the prefix and the suffix to the cache.
ps := &prefixSuffix{

View file

@ -1183,7 +1183,7 @@ func TestSimplifyRegexp(t *testing.T) {
f("ab|ad", "a", "[bd]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?i-s:UP.+X)")
f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps