diff --git a/Makefile b/Makefile
index 551c587c7..a97c9f389 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,7 @@ include package/release/Makefile
all: \
victoria-metrics-prod \
+ victoria-logs-prod \
vmagent-prod \
vmalert-prod \
vmauth-prod \
@@ -33,6 +34,7 @@ clean:
publish: docker-scan \
publish-victoria-metrics \
+ publish-victoria-logs \
publish-vmagent \
publish-vmalert \
publish-vmauth \
@@ -42,6 +44,7 @@ publish: docker-scan \
package: \
package-victoria-metrics \
+ package-victoria-logs \
package-vmagent \
package-vmalert \
package-vmauth \
@@ -178,6 +181,7 @@ publish-release:
release: \
release-victoria-metrics \
+ release-victoria-logs \
release-vmutils
release-victoria-metrics: \
@@ -191,7 +195,6 @@ release-victoria-metrics: \
release-victoria-metrics-openbsd-amd64 \
release-victoria-metrics-windows-amd64
-# adds i386 arch
release-victoria-metrics-linux-386:
GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch
@@ -238,6 +241,63 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
cd bin && rm -rf \
victoria-metrics-windows-$(GOARCH)-prod.exe
+release-victoria-logs: \
+ release-victoria-logs-linux-386 \
+ release-victoria-logs-linux-amd64 \
+ release-victoria-logs-linux-arm \
+ release-victoria-logs-linux-arm64 \
+ release-victoria-logs-darwin-amd64 \
+ release-victoria-logs-darwin-arm64 \
+ release-victoria-logs-freebsd-amd64 \
+ release-victoria-logs-openbsd-amd64 \
+ release-victoria-logs-windows-amd64
+
+release-victoria-logs-linux-386:
+ GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-linux-amd64:
+ GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-linux-arm:
+ GOOS=linux GOARCH=arm $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-linux-arm64:
+ GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-darwin-amd64:
+ GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-darwin-arm64:
+ GOOS=darwin GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-freebsd-amd64:
+ GOOS=freebsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-openbsd-amd64:
+ GOOS=openbsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
+
+release-victoria-logs-windows-amd64:
+ GOARCH=amd64 $(MAKE) release-victoria-logs-windows-goarch
+
+release-victoria-logs-goos-goarch: victoria-logs-$(GOOS)-$(GOARCH)-prod
+ cd bin && \
+ tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
+ victoria-logs-$(GOOS)-$(GOARCH)-prod \
+ && sha256sum victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
+ victoria-logs-$(GOOS)-$(GOARCH)-prod \
+ | sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
+ cd bin && rm -rf victoria-logs-$(GOOS)-$(GOARCH)-prod
+
+release-victoria-logs-windows-goarch: victoria-logs-windows-$(GOARCH)-prod
+ cd bin && \
+ zip victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \
+ victoria-logs-windows-$(GOARCH)-prod.exe \
+ && sha256sum victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \
+ victoria-logs-windows-$(GOARCH)-prod.exe \
+ > victoria-logs-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
+ cd bin && rm -rf \
+ victoria-logs-windows-$(GOARCH)-prod.exe
+
release-vmutils: \
release-vmutils-linux-386 \
release-vmutils-linux-amd64 \
diff --git a/app/victoria-logs/Makefile b/app/victoria-logs/Makefile
new file mode 100644
index 000000000..6b2170613
--- /dev/null
+++ b/app/victoria-logs/Makefile
@@ -0,0 +1,103 @@
+# All these commands must run from repository root.
+
+victoria-logs:
+ APP_NAME=victoria-logs $(MAKE) app-local
+
+victoria-logs-race:
+ APP_NAME=victoria-logs RACE=-race $(MAKE) app-local
+
+victoria-logs-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker
+
+victoria-logs-pure-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-pure
+
+victoria-logs-linux-amd64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-amd64
+
+victoria-logs-linux-arm-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm
+
+victoria-logs-linux-arm64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm64
+
+victoria-logs-linux-ppc64le-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-ppc64le
+
+victoria-logs-linux-386-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-386
+
+victoria-logs-darwin-amd64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-amd64
+
+victoria-logs-darwin-arm64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-arm64
+
+victoria-logs-freebsd-amd64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-freebsd-amd64
+
+victoria-logs-openbsd-amd64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-openbsd-amd64
+
+victoria-logs-windows-amd64-prod:
+ APP_NAME=victoria-logs $(MAKE) app-via-docker-windows-amd64
+
+package-victoria-logs:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker
+
+package-victoria-logs-pure:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-pure
+
+package-victoria-logs-amd64:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-amd64
+
+package-victoria-logs-arm:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-arm
+
+package-victoria-logs-arm64:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-arm64
+
+package-victoria-logs-ppc64le:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-ppc64le
+
+package-victoria-logs-386:
+ APP_NAME=victoria-logs $(MAKE) package-via-docker-386
+
+publish-victoria-logs:
+ APP_NAME=victoria-logs $(MAKE) publish-via-docker
+
+victoria-logs-linux-amd64:
+ APP_NAME=victoria-logs CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-linux-arm:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
+
+victoria-logs-linux-arm64:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-linux-ppc64le:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
+
+victoria-logs-linux-s390x:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
+
+victoria-logs-linux-386:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
+
+victoria-logs-darwin-amd64:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-darwin-arm64:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-freebsd-amd64:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-openbsd-amd64:
+ APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
+
+victoria-logs-windows-amd64:
+ GOARCH=amd64 APP_NAME=victoria-logs $(MAKE) app-local-windows-goarch
+
+victoria-logs-pure:
+ APP_NAME=victoria-logs $(MAKE) app-local-pure
diff --git a/app/victoria-logs/deployment/Dockerfile b/app/victoria-logs/deployment/Dockerfile
new file mode 100644
index 000000000..865964d71
--- /dev/null
+++ b/app/victoria-logs/deployment/Dockerfile
@@ -0,0 +1,8 @@
+ARG base_image
+FROM $base_image
+
+EXPOSE 8428
+
+ENTRYPOINT ["/victoria-logs-prod"]
+ARG src_binary
+COPY $src_binary ./victoria-logs-prod
diff --git a/app/victoria-logs/main.go b/app/victoria-logs/main.go
new file mode 100644
index 000000000..27150810f
--- /dev/null
+++ b/app/victoria-logs/main.go
@@ -0,0 +1,102 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "net/http"
+ "os"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
+)
+
+var (
+ httpListenAddr = flag.String("httpListenAddr", ":9428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
+ useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
+ "See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
+ "With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
+ gogc = flag.Int("gogc", 100, "GOGC to use. See https://tip.golang.org/doc/gc-guide")
+)
+
+func main() {
+ // Write flags and help message to stdout, since it is easier to grep or pipe.
+ flag.CommandLine.SetOutput(os.Stdout)
+ flag.Usage = usage
+ envflag.Parse()
+ cgroup.SetGOGC(*gogc)
+ buildinfo.Init()
+ logger.Init()
+ pushmetrics.Init()
+
+ logger.Infof("starting VictoriaLogs at %q...", *httpListenAddr)
+ startTime := time.Now()
+
+ vlstorage.Init()
+ vlselect.Init()
+ vlinsert.Init()
+
+ go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
+ logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds())
+
+ sig := procutil.WaitForSigterm()
+ logger.Infof("received signal %s", sig)
+
+ logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
+ startTime = time.Now()
+ if err := httpserver.Stop(*httpListenAddr); err != nil {
+ logger.Fatalf("cannot stop the webservice: %s", err)
+ }
+ logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
+
+ vlinsert.Stop()
+ vlselect.Stop()
+ vlstorage.Stop()
+
+ fs.MustStopDirRemover()
+
+ logger.Infof("the VictoriaLogs has been stopped in %.3f seconds", time.Since(startTime).Seconds())
+}
+
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
+ if r.URL.Path == "/" {
+ if r.Method != http.MethodGet {
+ return false
+ }
+ w.Header().Add("Content-Type", "text/html; charset=utf-8")
+ fmt.Fprintf(w, "
Single-node VictoriaLogs
")
+ fmt.Fprintf(w, "See docs at https://docs.victoriametrics.com/VictoriaLogs/")
+ fmt.Fprintf(w, "Useful endpoints:")
+ httpserver.WriteAPIHelp(w, [][2]string{
+ {"metrics", "available service metrics"},
+ {"flags", "command-line flags"},
+ })
+ return true
+ }
+ if vlinsert.RequestHandler(w, r) {
+ return true
+ }
+ if vlselect.RequestHandler(w, r) {
+ return true
+ }
+ return false
+}
+
+func usage() {
+ const s = `
+victoria-logs is a log management and analytics service.
+
+See the docs at https://docs.victoriametrics.com/VictoriaLogs/
+`
+ flagutil.Usage(s)
+}
diff --git a/app/victoria-logs/multiarch/Dockerfile b/app/victoria-logs/multiarch/Dockerfile
new file mode 100644
index 000000000..220add3a4
--- /dev/null
+++ b/app/victoria-logs/multiarch/Dockerfile
@@ -0,0 +1,12 @@
+# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
+ARG certs_image
+ARG root_image
+FROM $certs_image as certs
+RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
+
+FROM $root_image
+COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+EXPOSE 8428
+ENTRYPOINT ["/victoria-logs-prod"]
+ARG TARGETARCH
+COPY victoria-logs-linux-${TARGETARCH}-prod ./victoria-logs-prod
diff --git a/app/vlinsert/elasticsearch/bulk_response.qtpl b/app/vlinsert/elasticsearch/bulk_response.qtpl
new file mode 100644
index 000000000..f2b499d0e
--- /dev/null
+++ b/app/vlinsert/elasticsearch/bulk_response.qtpl
@@ -0,0 +1,20 @@
+{% stripspace %}
+
+{% func BulkResponse(n int, tookMs int64) %}
+{
+ "took":{%dl tookMs %},
+ "errors":false,
+ "items":[
+ {% for i := 0; i < n; i++ %}
+ {
+ "create":{
+ "status":201
+ }
+ }
+ {% if i+1 < n %},{% endif %}
+ {% endfor %}
+ ]
+}
+{% endfunc %}
+
+{% endstripspace %}
diff --git a/app/vlinsert/elasticsearch/bulk_response.qtpl.go b/app/vlinsert/elasticsearch/bulk_response.qtpl.go
new file mode 100644
index 000000000..5bd6c5a58
--- /dev/null
+++ b/app/vlinsert/elasticsearch/bulk_response.qtpl.go
@@ -0,0 +1,69 @@
+// Code generated by qtc from "bulk_response.qtpl". DO NOT EDIT.
+// See https://github.com/valyala/quicktemplate for details.
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
+package elasticsearch
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
+import (
+ qtio422016 "io"
+
+ qt422016 "github.com/valyala/quicktemplate"
+)
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
+var (
+ _ = qtio422016.Copy
+ _ = qt422016.AcquireByteBuffer
+)
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
+func StreamBulkResponse(qw422016 *qt422016.Writer, n int, tookMs int64) {
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
+ qw422016.N().S(`{"took":`)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:5
+ qw422016.N().DL(tookMs)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:5
+ qw422016.N().S(`,"errors":false,"items":[`)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:8
+ for i := 0; i < n; i++ {
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:8
+ qw422016.N().S(`{"create":{"status":201}}`)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
+ if i+1 < n {
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
+ qw422016.N().S(`,`)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
+ }
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:15
+ }
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:15
+ qw422016.N().S(`]}`)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+}
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+func WriteBulkResponse(qq422016 qtio422016.Writer, n int, tookMs int64) {
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ qw422016 := qt422016.AcquireWriter(qq422016)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ StreamBulkResponse(qw422016, n, tookMs)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ qt422016.ReleaseWriter(qw422016)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+}
+
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+func BulkResponse(n int, tookMs int64) string {
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ qb422016 := qt422016.AcquireByteBuffer()
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ WriteBulkResponse(qb422016, n, tookMs)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ qs422016 := string(qb422016.B)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ qt422016.ReleaseByteBuffer(qb422016)
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+ return qs422016
+//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
+}
diff --git a/app/vlinsert/elasticsearch/elasticsearch.go b/app/vlinsert/elasticsearch/elasticsearch.go
new file mode 100644
index 000000000..de4cf945c
--- /dev/null
+++ b/app/vlinsert/elasticsearch/elasticsearch.go
@@ -0,0 +1,410 @@
+package elasticsearch
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+ "net/http"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
+ "github.com/VictoriaMetrics/metrics"
+ "github.com/valyala/fastjson"
+)
+
+var (
+ maxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers")
+)
+
+// RequestHandler processes ElasticSearch insert requests
+func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
+ w.Header().Add("Content-Type", "application/json")
+ // This header is needed for Logstash
+ w.Header().Set("X-Elastic-Product", "Elasticsearch")
+
+ if strings.HasPrefix(path, "/_ilm/policy") {
+ // Return fake response for ElasticSearch ilm request.
+ fmt.Fprintf(w, `{}`)
+ return true
+ }
+ if strings.HasPrefix(path, "/_index_template") {
+ // Return fake response for ElasticSearch index template request.
+ fmt.Fprintf(w, `{}`)
+ return true
+ }
+ if strings.HasPrefix(path, "/_ingest") {
+ // Return fake response for ElasticSearch ingest pipeline request.
+ // See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/put-pipeline-api.html
+ fmt.Fprintf(w, `{}`)
+ return true
+ }
+ if strings.HasPrefix(path, "/_nodes") {
+ // Return fake response for ElasticSearch nodes discovery request.
+ // See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/cluster.html
+ fmt.Fprintf(w, `{}`)
+ return true
+ }
+ switch path {
+ case "/":
+ switch r.Method {
+ case http.MethodGet:
+ // Return fake response for ElasticSearch ping request.
+ // See the latest available version for ElasticSearch at https://github.com/elastic/elasticsearch/releases
+ fmt.Fprintf(w, `{
+ "version": {
+ "number": "8.8.0"
+ }
+ }`)
+ case http.MethodHead:
+ // Return empty response for Logstash ping request.
+ }
+
+ return true
+ case "/_license":
+ // Return fake response for ElasticSearch license request.
+ fmt.Fprintf(w, `{
+ "license": {
+ "uid": "cbff45e7-c553-41f7-ae4f-9205eabd80xx",
+ "type": "oss",
+ "status": "active",
+ "expiry_date_in_millis" : 4000000000000
+ }
+ }`)
+ return true
+ case "/_bulk":
+ startTime := time.Now()
+ bulkRequestsTotal.Inc()
+
+ // Extract tenantID
+ tenantID, err := logstorage.GetTenantIDFromRequest(r)
+ if err != nil {
+ httpserver.Errorf(w, r, "%s", err)
+ return true
+ }
+
+ // Extract time field name from _time_field query arg
+ var timeField = "_time"
+ if tf := r.FormValue("_time_field"); tf != "" {
+ timeField = tf
+ }
+
+ // Extract message field name from _msg_field query arg
+ var msgField = ""
+ if msgf := r.FormValue("_msg_field"); msgf != "" {
+ msgField = msgf
+ }
+
+ // Extract stream field names from _stream_fields query arg
+ var streamFields []string
+ if sfs := r.FormValue("_stream_fields"); sfs != "" {
+ streamFields = strings.Split(sfs, ",")
+ }
+
+ // Extract field names, which must be ignored
+ var ignoreFields []string
+ if ifs := r.FormValue("ignore_fields"); ifs != "" {
+ ignoreFields = strings.Split(ifs, ",")
+ }
+
+ lr := logstorage.GetLogRows(streamFields, ignoreFields)
+ processLogMessage := func(timestamp int64, fields []logstorage.Field) {
+ lr.MustAdd(tenantID, timestamp, fields)
+ if lr.NeedFlush() {
+ vlstorage.MustAddRows(lr)
+ lr.Reset()
+ }
+ }
+
+ isGzip := r.Header.Get("Content-Encoding") == "gzip"
+ n, err := readBulkRequest(r.Body, isGzip, timeField, msgField, processLogMessage)
+ if err != nil {
+ logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
+ return true
+ }
+ vlstorage.MustAddRows(lr)
+ logstorage.PutLogRows(lr)
+
+ tookMs := time.Since(startTime).Milliseconds()
+ bw := bufferedwriter.Get(w)
+ defer bufferedwriter.Put(bw)
+ WriteBulkResponse(bw, n, tookMs)
+ _ = bw.Flush()
+ return true
+ default:
+ return false
+ }
+}
+
+var bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
+
+func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
+ processLogMessage func(timestamp int64, fields []logstorage.Field),
+) (int, error) {
+ // See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
+
+ if isGzip {
+ zr, err := common.GetGzipReader(r)
+ if err != nil {
+ return 0, fmt.Errorf("cannot read gzipped _bulk request: %w", err)
+ }
+ defer common.PutGzipReader(zr)
+ r = zr
+ }
+
+ wcr := writeconcurrencylimiter.GetReader(r)
+ defer writeconcurrencylimiter.PutReader(wcr)
+
+ lb := lineBufferPool.Get()
+ defer lineBufferPool.Put(lb)
+
+ lb.B = bytesutil.ResizeNoCopyNoOverallocate(lb.B, maxLineSizeBytes.IntN())
+ sc := bufio.NewScanner(wcr)
+ sc.Buffer(lb.B, len(lb.B))
+
+ n := 0
+ nCheckpoint := 0
+ for {
+ ok, err := readBulkLine(sc, timeField, msgField, processLogMessage)
+ wcr.DecConcurrency()
+ if err != nil || !ok {
+ rowsIngestedTotal.Add(n - nCheckpoint)
+ return n, err
+ }
+ n++
+ if batchSize := n - nCheckpoint; n >= 1000 {
+ rowsIngestedTotal.Add(batchSize)
+ nCheckpoint = n
+ }
+ }
+}
+
+var lineBufferPool bytesutil.ByteBufferPool
+
+var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
+
+func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
+ processLogMessage func(timestamp int64, fields []logstorage.Field),
+) (bool, error) {
+ // Decode command, must be "create" or "index"
+ if !sc.Scan() {
+ if err := sc.Err(); err != nil {
+ if errors.Is(err, bufio.ErrTooLong) {
+ return false, fmt.Errorf(`cannot read "create" or "index" command, since its size exceeds -insert.maxLineSizeBytes=%d`, maxLineSizeBytes.IntN())
+ }
+ return false, err
+ }
+ return false, nil
+ }
+ line := sc.Bytes()
+ p := parserPool.Get()
+ v, err := p.ParseBytes(line)
+ if err != nil {
+ return false, fmt.Errorf(`cannot parse "create" or "index" command: %w`, err)
+ }
+ if v.GetObject("create") == nil && v.GetObject("index") == nil {
+ return false, fmt.Errorf(`unexpected command %q; expected "create" or "index"`, v)
+ }
+ parserPool.Put(p)
+
+ // Decode log message
+ if !sc.Scan() {
+ if err := sc.Err(); err != nil {
+ if errors.Is(err, bufio.ErrTooLong) {
+ return false, fmt.Errorf("cannot read log message, since its size exceeds -insert.maxLineSizeBytes=%d", maxLineSizeBytes.IntN())
+ }
+ return false, err
+ }
+ return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
+ }
+ line = sc.Bytes()
+ pctx := getParserCtx()
+ if err := pctx.parseLogMessage(line); err != nil {
+ invalidJSONLineLogger.Warnf("cannot parse json-encoded log entry: %s", err)
+ return true, nil
+ }
+
+ timestamp, err := extractTimestampFromFields(timeField, pctx.fields)
+ if err != nil {
+ invalidTimestampLogger.Warnf("skipping the log entry because cannot parse timestamp: %s", err)
+ return true, nil
+ }
+ updateMessageFieldName(msgField, pctx.fields)
+ processLogMessage(timestamp, pctx.fields)
+ putParserCtx(pctx)
+ return true, nil
+}
+
+var parserPool fastjson.ParserPool
+
+var (
+ invalidTimestampLogger = logger.WithThrottler("invalidTimestampLogger", 5*time.Second)
+ invalidJSONLineLogger = logger.WithThrottler("invalidJSONLineLogger", 5*time.Second)
+)
+
+func extractTimestampFromFields(timeField string, fields []logstorage.Field) (int64, error) {
+ for i := range fields {
+ f := &fields[i]
+ if f.Name != timeField {
+ continue
+ }
+ timestamp, err := parseElasticsearchTimestamp(f.Value)
+ if err != nil {
+ return 0, err
+ }
+ f.Value = ""
+ return timestamp, nil
+ }
+ return time.Now().UnixNano(), nil
+}
+
+func updateMessageFieldName(msgField string, fields []logstorage.Field) {
+ if msgField == "" {
+ return
+ }
+ for i := range fields {
+ f := &fields[i]
+ if f.Name == msgField {
+ f.Name = "_msg"
+ return
+ }
+ }
+}
+
+type parserCtx struct {
+ p fastjson.Parser
+ buf []byte
+ prefixBuf []byte
+ fields []logstorage.Field
+}
+
+func (pctx *parserCtx) reset() {
+ pctx.buf = pctx.buf[:0]
+ pctx.prefixBuf = pctx.prefixBuf[:0]
+
+ fields := pctx.fields
+ for i := range fields {
+ lf := &fields[i]
+ lf.Name = ""
+ lf.Value = ""
+ }
+ pctx.fields = fields[:0]
+}
+
+func getParserCtx() *parserCtx {
+ v := parserCtxPool.Get()
+ if v == nil {
+ return &parserCtx{}
+ }
+ return v.(*parserCtx)
+}
+
+func putParserCtx(pctx *parserCtx) {
+ pctx.reset()
+ parserCtxPool.Put(pctx)
+}
+
+var parserCtxPool sync.Pool
+
+func (pctx *parserCtx) parseLogMessage(msg []byte) error {
+ s := bytesutil.ToUnsafeString(msg)
+ v, err := pctx.p.Parse(s)
+ if err != nil {
+ return fmt.Errorf("cannot parse json: %w", err)
+ }
+ if t := v.Type(); t != fastjson.TypeObject {
+ return fmt.Errorf("expecting json dictionary; got %s", t)
+ }
+ pctx.reset()
+ pctx.fields, pctx.buf, pctx.prefixBuf = appendLogFields(pctx.fields, pctx.buf, pctx.prefixBuf, v)
+ return nil
+}
+
+func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) {
+ o := v.GetObject()
+ o.Visit(func(k []byte, v *fastjson.Value) {
+ t := v.Type()
+ switch t {
+ case fastjson.TypeNull:
+ // Skip nulls
+ case fastjson.TypeObject:
+ // Flatten nested JSON objects.
+ // For example, {"foo":{"bar":"baz"}} is converted to {"foo.bar":"baz"}
+ prefixLen := len(prefixBuf)
+ prefixBuf = append(prefixBuf, k...)
+ prefixBuf = append(prefixBuf, '.')
+ dst, dstBuf, prefixBuf = appendLogFields(dst, dstBuf, prefixBuf, v)
+ prefixBuf = prefixBuf[:prefixLen]
+ case fastjson.TypeArray, fastjson.TypeNumber, fastjson.TypeTrue, fastjson.TypeFalse:
+ // Convert JSON arrays, numbers, true and false values to their string representation
+ dstBufLen := len(dstBuf)
+ dstBuf = v.MarshalTo(dstBuf)
+ value := dstBuf[dstBufLen:]
+ dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value)
+ case fastjson.TypeString:
+ // Decode JSON strings
+ dstBufLen := len(dstBuf)
+ dstBuf = append(dstBuf, v.GetStringBytes()...)
+ value := dstBuf[dstBufLen:]
+ dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value)
+ default:
+ logger.Panicf("BUG: unexpected JSON type: %s", t)
+ }
+ })
+ return dst, dstBuf, prefixBuf
+}
+
+func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) {
+ dstBufLen := len(dstBuf)
+ dstBuf = append(dstBuf, prefixBuf...)
+ dstBuf = append(dstBuf, k...)
+ name := dstBuf[dstBufLen:]
+
+ dst = append(dst, logstorage.Field{
+ Name: bytesutil.ToUnsafeString(name),
+ Value: bytesutil.ToUnsafeString(value),
+ })
+ return dst, dstBuf
+}
+
+func parseElasticsearchTimestamp(s string) (int64, error) {
+ if len(s) < len("YYYY-MM-DD") || s[len("YYYY")] != '-' {
+ // Try parsing timestamp in milliseconds
+ n, err := strconv.ParseInt(s, 10, 64)
+ if err != nil {
+ return 0, fmt.Errorf("cannot parse timestamp in milliseconds from %q: %w", s, err)
+ }
+ if n > int64(math.MaxInt64)/1e6 {
+ return 0, fmt.Errorf("too big timestamp in milliseconds: %d; mustn't exceed %d", n, int64(math.MaxInt64)/1e6)
+ }
+ if n < int64(math.MinInt64)/1e6 {
+ return 0, fmt.Errorf("too small timestamp in milliseconds: %d; must be bigger than %d", n, int64(math.MinInt64)/1e6)
+ }
+ n *= 1e6
+ return n, nil
+ }
+ if len(s) == len("YYYY-MM-DD") {
+ t, err := time.Parse("2006-01-02", s)
+ if err != nil {
+ return 0, fmt.Errorf("cannot parse date %q: %w", s, err)
+ }
+ return t.UnixNano(), nil
+ }
+ t, err := time.Parse(time.RFC3339, s)
+ if err != nil {
+ return 0, fmt.Errorf("cannot parse timestamp %q: %w", s, err)
+ }
+ return t.UnixNano(), nil
+}
diff --git a/app/vlinsert/elasticsearch/elasticsearch_test.go b/app/vlinsert/elasticsearch/elasticsearch_test.go
new file mode 100644
index 000000000..ff9a5a110
--- /dev/null
+++ b/app/vlinsert/elasticsearch/elasticsearch_test.go
@@ -0,0 +1,97 @@
+package elasticsearch
+
+import (
+ "bytes"
+ "compress/gzip"
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+)
+
+func TestReadBulkRequest(t *testing.T) {
+ f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
+ t.Helper()
+
+ var timestamps []int64
+ var result string
+ processLogMessage := func(timestamp int64, fields []logstorage.Field) {
+ timestamps = append(timestamps, timestamp)
+
+ a := make([]string, len(fields))
+ for i, f := range fields {
+ a[i] = fmt.Sprintf("%q:%q", f.Name, f.Value)
+ }
+ s := "{" + strings.Join(a, ",") + "}\n"
+ result += s
+ }
+
+ // Read the request without compression
+ r := bytes.NewBufferString(data)
+ rows, err := readBulkRequest(r, false, timeField, msgField, processLogMessage)
+ if err != nil {
+ t.Fatalf("unexpected error: %s", err)
+ }
+ if rows != rowsExpected {
+ t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected)
+ }
+
+ if !reflect.DeepEqual(timestamps, timestampsExpected) {
+ t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected)
+ }
+ if result != resultExpected {
+ t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
+ }
+
+ // Read the request with compression
+ timestamps = nil
+ result = ""
+ compressedData := compressData(data)
+ r = bytes.NewBufferString(compressedData)
+ rows, err = readBulkRequest(r, true, timeField, msgField, processLogMessage)
+ if err != nil {
+ t.Fatalf("unexpected error: %s", err)
+ }
+ if rows != rowsExpected {
+ t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected)
+ }
+
+ if !reflect.DeepEqual(timestamps, timestampsExpected) {
+ t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected)
+ }
+ if result != resultExpected {
+ t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
+ }
+ }
+
+ data := `{"create":{"_index":"filebeat-8.8.0"}}
+{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
+{"create":{"_index":"filebeat-8.8.0"}}
+{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"}
+{"create":{"_index":"filebeat-8.8.0"}}
+{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"}
+`
+ timeField := "@timestamp"
+ msgField := "message"
+ rowsExpected := 3
+ timestampsExpected := []int64{1686026891735000000, 1686026892735000000, 1686026893735000000}
+ resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"}
+{"@timestamp":"","_msg":"baz"}
+{"_msg":"xyz","@timestamp":"","x":"y"}
+`
+ f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
+}
+
+func compressData(s string) string {
+ var bb bytes.Buffer
+ zw := gzip.NewWriter(&bb)
+ if _, err := zw.Write([]byte(s)); err != nil {
+ panic(fmt.Errorf("unexpected error when compressing data: %s", err))
+ }
+ if err := zw.Close(); err != nil {
+ panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
+ }
+ return bb.String()
+}
diff --git a/app/vlinsert/elasticsearch/elasticsearch_timing_test.go b/app/vlinsert/elasticsearch/elasticsearch_timing_test.go
new file mode 100644
index 000000000..9a50fe0eb
--- /dev/null
+++ b/app/vlinsert/elasticsearch/elasticsearch_timing_test.go
@@ -0,0 +1,50 @@
+package elasticsearch
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+)
+
+func BenchmarkReadBulkRequest(b *testing.B) {
+ b.Run("gzip:off", func(b *testing.B) {
+ benchmarkReadBulkRequest(b, false)
+ })
+ b.Run("gzip:on", func(b *testing.B) {
+ benchmarkReadBulkRequest(b, true)
+ })
+}
+
+func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
+ data := `{"create":{"_index":"filebeat-8.8.0"}}
+{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
+{"create":{"_index":"filebeat-8.8.0"}}
+{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"}
+{"create":{"_index":"filebeat-8.8.0"}}
+{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"}
+`
+ if isGzip {
+ data = compressData(data)
+ }
+ dataBytes := bytesutil.ToUnsafeBytes(data)
+
+ timeField := "@timestamp"
+ msgField := "message"
+ processLogMessage := func(timestmap int64, fields []logstorage.Field) {}
+
+ b.ReportAllocs()
+ b.SetBytes(int64(len(data)))
+ b.RunParallel(func(pb *testing.PB) {
+ r := &bytes.Reader{}
+ for pb.Next() {
+ r.Reset(dataBytes)
+ _, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
+ if err != nil {
+ panic(fmt.Errorf("unexpected error: %s", err))
+ }
+ }
+ })
+}
diff --git a/app/vlinsert/main.go b/app/vlinsert/main.go
new file mode 100644
index 000000000..64157229f
--- /dev/null
+++ b/app/vlinsert/main.go
@@ -0,0 +1,34 @@
+package vlinsert
+
+import (
+ "net/http"
+ "strings"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/elasticsearch"
+)
+
+// Init initializes vlinsert
+func Init() {
+}
+
+// Stop stops vlinsert
+func Stop() {
+}
+
+// RequestHandler handles insert requests for VictoriaLogs
+func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+ path := r.URL.Path
+ if !strings.HasPrefix(path, "/insert/") {
+ return false
+ }
+ path = strings.TrimPrefix(path, "/insert")
+ path = strings.ReplaceAll(path, "//", "/")
+
+ switch {
+ case strings.HasPrefix(path, "/elasticsearch/"):
+ path = strings.TrimPrefix(path, "/elasticsearch")
+ return elasticsearch.RequestHandler(path, w, r)
+ default:
+ return false
+ }
+}
diff --git a/app/vlselect/logsql/logsql.go b/app/vlselect/logsql/logsql.go
new file mode 100644
index 000000000..05d6f8830
--- /dev/null
+++ b/app/vlselect/logsql/logsql.go
@@ -0,0 +1,53 @@
+package logsql
+
+import (
+ "net/http"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+)
+
+// ProcessQueryRequest handles /select/logsql/query request
+func ProcessQueryRequest(w http.ResponseWriter, r *http.Request, stopCh <-chan struct{}) {
+ // Extract tenantID
+ tenantID, err := logstorage.GetTenantIDFromRequest(r)
+ if err != nil {
+ httpserver.Errorf(w, r, "%s", err)
+ return
+ }
+
+ qStr := r.FormValue("query")
+ q, err := logstorage.ParseQuery(qStr)
+ if err != nil {
+ httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
+ return
+ }
+ w.Header().Set("Content-Type", "application/stream+json; charset=utf-8")
+
+ bw := bufferedwriter.Get(w)
+ defer bufferedwriter.Put(bw)
+
+ tenantIDs := []logstorage.TenantID{tenantID}
+ vlstorage.RunQuery(tenantIDs, q, stopCh, func(columns []logstorage.BlockColumn) {
+ if len(columns) == 0 {
+ return
+ }
+ rowsCount := len(columns[0].Values)
+
+ bb := blockResultPool.Get()
+ for rowIdx := 0; rowIdx < rowsCount; rowIdx++ {
+ WriteJSONRow(bb, columns, rowIdx)
+ }
+ // Do not check for error here, since the only valid error is when the client
+ // closes the connection during Write() call. There is no need in logging this error,
+ // since it may be too verbose and it doesn't give any actionable info.
+ _, _ = bw.Write(bb.B)
+ blockResultPool.Put(bb)
+ })
+ _ = bw.Flush()
+}
+
+var blockResultPool bytesutil.ByteBufferPool
diff --git a/app/vlselect/logsql/query_response.qtpl b/app/vlselect/logsql/query_response.qtpl
new file mode 100644
index 000000000..c98b0c9bd
--- /dev/null
+++ b/app/vlselect/logsql/query_response.qtpl
@@ -0,0 +1,20 @@
+{% import (
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+) %}
+
+{% stripspace %}
+
+// JSONRow creates JSON row from the given fields.
+{% func JSONRow(columns []logstorage.BlockColumn, rowIdx int) %}
+{
+ {% code c := &columns[0] %}
+ {%q= c.Name %}:{%q= c.Values[rowIdx] %}
+ {% code columns = columns[1:] %}
+ {% for colIdx := range columns %}
+ {% code c := &columns[colIdx] %}
+ ,{%q= c.Name %}:{%q= c.Values[rowIdx] %}
+ {% endfor %}
+}{% newline %}
+{% endfunc %}
+
+{% endstripspace %}
diff --git a/app/vlselect/logsql/query_response.qtpl.go b/app/vlselect/logsql/query_response.qtpl.go
new file mode 100644
index 000000000..d3d6cf1c1
--- /dev/null
+++ b/app/vlselect/logsql/query_response.qtpl.go
@@ -0,0 +1,90 @@
+// Code generated by qtc from "query_response.qtpl". DO NOT EDIT.
+// See https://github.com/valyala/quicktemplate for details.
+
+//line app/vlselect/logsql/query_response.qtpl:1
+package logsql
+
+//line app/vlselect/logsql/query_response.qtpl:1
+import (
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+)
+
+// JSONRow creates JSON row from the given fields.
+
+//line app/vlselect/logsql/query_response.qtpl:8
+import (
+ qtio422016 "io"
+
+ qt422016 "github.com/valyala/quicktemplate"
+)
+
+//line app/vlselect/logsql/query_response.qtpl:8
+var (
+ _ = qtio422016.Copy
+ _ = qt422016.AcquireByteBuffer
+)
+
+//line app/vlselect/logsql/query_response.qtpl:8
+func StreamJSONRow(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
+//line app/vlselect/logsql/query_response.qtpl:8
+ qw422016.N().S(`{`)
+//line app/vlselect/logsql/query_response.qtpl:10
+ c := &columns[0]
+
+//line app/vlselect/logsql/query_response.qtpl:11
+ qw422016.N().Q(c.Name)
+//line app/vlselect/logsql/query_response.qtpl:11
+ qw422016.N().S(`:`)
+//line app/vlselect/logsql/query_response.qtpl:11
+ qw422016.N().Q(c.Values[rowIdx])
+//line app/vlselect/logsql/query_response.qtpl:12
+ columns = columns[1:]
+
+//line app/vlselect/logsql/query_response.qtpl:13
+ for colIdx := range columns {
+//line app/vlselect/logsql/query_response.qtpl:14
+ c := &columns[colIdx]
+
+//line app/vlselect/logsql/query_response.qtpl:14
+ qw422016.N().S(`,`)
+//line app/vlselect/logsql/query_response.qtpl:15
+ qw422016.N().Q(c.Name)
+//line app/vlselect/logsql/query_response.qtpl:15
+ qw422016.N().S(`:`)
+//line app/vlselect/logsql/query_response.qtpl:15
+ qw422016.N().Q(c.Values[rowIdx])
+//line app/vlselect/logsql/query_response.qtpl:16
+ }
+//line app/vlselect/logsql/query_response.qtpl:16
+ qw422016.N().S(`}`)
+//line app/vlselect/logsql/query_response.qtpl:17
+ qw422016.N().S(`
+`)
+//line app/vlselect/logsql/query_response.qtpl:18
+}
+
+//line app/vlselect/logsql/query_response.qtpl:18
+func WriteJSONRow(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
+//line app/vlselect/logsql/query_response.qtpl:18
+ qw422016 := qt422016.AcquireWriter(qq422016)
+//line app/vlselect/logsql/query_response.qtpl:18
+ StreamJSONRow(qw422016, columns, rowIdx)
+//line app/vlselect/logsql/query_response.qtpl:18
+ qt422016.ReleaseWriter(qw422016)
+//line app/vlselect/logsql/query_response.qtpl:18
+}
+
+//line app/vlselect/logsql/query_response.qtpl:18
+func JSONRow(columns []logstorage.BlockColumn, rowIdx int) string {
+//line app/vlselect/logsql/query_response.qtpl:18
+ qb422016 := qt422016.AcquireByteBuffer()
+//line app/vlselect/logsql/query_response.qtpl:18
+ WriteJSONRow(qb422016, columns, rowIdx)
+//line app/vlselect/logsql/query_response.qtpl:18
+ qs422016 := string(qb422016.B)
+//line app/vlselect/logsql/query_response.qtpl:18
+ qt422016.ReleaseByteBuffer(qb422016)
+//line app/vlselect/logsql/query_response.qtpl:18
+ return qs422016
+//line app/vlselect/logsql/query_response.qtpl:18
+}
diff --git a/app/vlselect/main.go b/app/vlselect/main.go
new file mode 100644
index 000000000..022740496
--- /dev/null
+++ b/app/vlselect/main.go
@@ -0,0 +1,140 @@
+package vlselect
+
+import (
+ "flag"
+ "fmt"
+ "net/http"
+ "strings"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
+ "github.com/VictoriaMetrics/metrics"
+)
+
+var (
+ maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
+ "It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+
+ "See also -search.maxQueueDuration")
+ maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
+ "limit is reached; see also -search.maxQueryDuration")
+ maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution")
+)
+
+func getDefaultMaxConcurrentRequests() int {
+ n := cgroup.AvailableCPUs()
+ if n <= 4 {
+ n *= 2
+ }
+ if n > 16 {
+ // A single request can saturate all the CPU cores, so there is no sense
+ // in allowing higher number of concurrent requests - they will just contend
+ // for unavailable CPU time.
+ n = 16
+ }
+ return n
+}
+
+// Init initializes vlselect
+func Init() {
+ concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)
+}
+
+// Stop stops vlselect
+func Stop() {
+}
+
+var concurrencyLimitCh chan struct{}
+
+var (
+ concurrencyLimitReached = metrics.NewCounter(`vl_concurrent_select_limit_reached_total`)
+ concurrencyLimitTimeout = metrics.NewCounter(`vl_concurrent_select_limit_timeout_total`)
+
+ _ = metrics.NewGauge(`vl_concurrent_select_capacity`, func() float64 {
+ return float64(cap(concurrencyLimitCh))
+ })
+ _ = metrics.NewGauge(`vl_concurrent_select_current`, func() float64 {
+ return float64(len(concurrencyLimitCh))
+ })
+)
+
+// RequestHandler handles select requests for VictoriaLogs
+func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+ path := r.URL.Path
+ if !strings.HasPrefix(path, "/select/") {
+ return false
+ }
+ path = strings.TrimPrefix(path, "/select")
+ path = strings.ReplaceAll(path, "//", "/")
+
+ // Limit the number of concurrent queries.
+ startTime := time.Now()
+ stopCh := r.Context().Done()
+ select {
+ case concurrencyLimitCh <- struct{}{}:
+ defer func() { <-concurrencyLimitCh }()
+ default:
+ // Sleep for a while until giving up. This should resolve short bursts in requests.
+ concurrencyLimitReached.Inc()
+ d := getMaxQueryDuration(r)
+ if d > *maxQueueDuration {
+ d = *maxQueueDuration
+ }
+ t := timerpool.Get(d)
+ select {
+ case concurrencyLimitCh <- struct{}{}:
+ timerpool.Put(t)
+ defer func() { <-concurrencyLimitCh }()
+ case <-stopCh:
+ timerpool.Put(t)
+ remoteAddr := httpserver.GetQuotedRemoteAddr(r)
+ requestURI := httpserver.GetRequestURI(r)
+ logger.Infof("client has cancelled the request after %.3f seconds: remoteAddr=%s, requestURI: %q",
+ time.Since(startTime).Seconds(), remoteAddr, requestURI)
+ return true
+ case <-t.C:
+ timerpool.Put(t)
+ concurrencyLimitTimeout.Inc()
+ err := &httpserver.ErrorWithStatusCode{
+ Err: fmt.Errorf("couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests "+
+ "are executed. Possible solutions: to reduce query load; to add more compute resources to the server; "+
+ "to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration; to increase -search.maxConcurrentRequests",
+ d.Seconds(), *maxConcurrentRequests, maxQueueDuration),
+ StatusCode: http.StatusServiceUnavailable,
+ }
+ httpserver.Errorf(w, r, "%s", err)
+ return true
+ }
+ }
+
+ switch {
+ case path == "/logsql/query":
+ logsqlQueryRequests.Inc()
+ httpserver.EnableCORS(w, r)
+ logsql.ProcessQueryRequest(w, r, stopCh)
+ return true
+ default:
+ return false
+ }
+}
+
+// getMaxQueryDuration returns the maximum duration for query from r.
+func getMaxQueryDuration(r *http.Request) time.Duration {
+ dms, err := httputils.GetDuration(r, "timeout", 0)
+ if err != nil {
+ dms = 0
+ }
+ d := time.Duration(dms) * time.Millisecond
+ if d <= 0 || d > *maxQueryDuration {
+ d = *maxQueryDuration
+ }
+ return d
+}
+
+var (
+ logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
+)
diff --git a/app/vlstorage/main.go b/app/vlstorage/main.go
new file mode 100644
index 000000000..afaa78f1d
--- /dev/null
+++ b/app/vlstorage/main.go
@@ -0,0 +1,149 @@
+package vlstorage
+
+import (
+ "flag"
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
+ "github.com/VictoriaMetrics/metrics"
+)
+
+var (
+ retentionPeriod = flagutil.NewDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+
+ "log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+
+ "see https://docs.victoriametrics.com/VictoriaLogs/#retention")
+ futureRetention = flagutil.NewDuration("futureRetention", "2d", "Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; "+
+ "see https://docs.victoriametrics.com/VictoriaLogs/#retention")
+ storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory with the VictoriaLogs data; "+
+ "see https://docs.victoriametrics.com/VictoriaLogs/#storage")
+ inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
+ "The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+
+ "Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
+ "Smaller intervals increase disk IO load. Minimum supported value is 1s")
+ logNewStreams = flag.Bool("logNewStreams", false, "Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; "+
+ "see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
+ logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
+ "see https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion ; see also -logNewStreams")
+)
+
+// Init initializes vlstorage.
+//
+// Stop must be called when vlstorage is no longer needed
+func Init() {
+ if strg != nil {
+ logger.Panicf("BUG: Init() has been already called")
+ }
+
+ if retentionPeriod.Msecs < 24*3600*1000 {
+ logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
+ }
+ cfg := &logstorage.StorageConfig{
+ Retention: time.Millisecond * time.Duration(retentionPeriod.Msecs),
+ FlushInterval: *inmemoryDataFlushInterval,
+ FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs),
+ LogNewStreams: *logNewStreams,
+ LogIngestedRows: *logIngestedRows,
+ }
+ strg = logstorage.MustOpenStorage(*storageDataPath, cfg)
+ storageMetrics = initStorageMetrics(strg)
+ metrics.RegisterSet(storageMetrics)
+}
+
+// Stop stops vlstorage.
+func Stop() {
+ metrics.UnregisterSet(storageMetrics)
+ storageMetrics = nil
+
+ strg.MustClose()
+ strg = nil
+}
+
+var strg *logstorage.Storage
+var storageMetrics *metrics.Set
+
+// MustAddRows adds lr to vlstorage
+func MustAddRows(lr *logstorage.LogRows) {
+ strg.MustAddRows(lr)
+}
+
+// RunQuery runs the given q and calls processBlock for the returned data blocks
+func RunQuery(tenantIDs []logstorage.TenantID, q *logstorage.Query, stopCh <-chan struct{}, processBlock func(columns []logstorage.BlockColumn)) {
+ strg.RunQuery(tenantIDs, q, stopCh, processBlock)
+}
+
+func initStorageMetrics(strg *logstorage.Storage) *metrics.Set {
+ ssCache := &logstorage.StorageStats{}
+ var ssCacheLock sync.Mutex
+ var lastUpdateTime time.Time
+
+ m := func() *logstorage.StorageStats {
+ ssCacheLock.Lock()
+ defer ssCacheLock.Unlock()
+ if time.Since(lastUpdateTime) < time.Second {
+ return ssCache
+ }
+ var ss logstorage.StorageStats
+ strg.UpdateStats(&ss)
+ ssCache = &ss
+ lastUpdateTime = time.Now()
+ return ssCache
+ }
+
+ ms := metrics.NewSet()
+
+ ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 {
+ return float64(fs.MustGetFreeSpace(*storageDataPath))
+ })
+
+ ms.NewGauge(`vl_rows{type="inmemory"}`, func() float64 {
+ return float64(m().InmemoryRowsCount)
+ })
+ ms.NewGauge(`vl_rows{type="file"}`, func() float64 {
+ return float64(m().FileRowsCount)
+ })
+ ms.NewGauge(`vl_parts{type="inmemory"}`, func() float64 {
+ return float64(m().InmemoryParts)
+ })
+ ms.NewGauge(`vl_parts{type="file"}`, func() float64 {
+ return float64(m().FileParts)
+ })
+ ms.NewGauge(`vl_blocks{type="inmemory"}`, func() float64 {
+ return float64(m().InmemoryBlocks)
+ })
+ ms.NewGauge(`vl_blocks{type="file"}`, func() float64 {
+ return float64(m().FileBlocks)
+ })
+ ms.NewGauge(`vl_partitions`, func() float64 {
+ return float64(m().PartitionsCount)
+ })
+ ms.NewGauge(`vl_streams_created_total`, func() float64 {
+ return float64(m().StreamsCreatedTotal)
+ })
+
+ ms.NewGauge(`vl_compressed_data_size_bytes{type="inmemory"}`, func() float64 {
+ return float64(m().CompressedInmemorySize)
+ })
+ ms.NewGauge(`vl_compressed_data_size_bytes{type="file"}`, func() float64 {
+ return float64(m().CompressedFileSize)
+ })
+ ms.NewGauge(`vl_uncompressed_data_size_bytes{type="inmemory"}`, func() float64 {
+ return float64(m().UncompressedInmemorySize)
+ })
+ ms.NewGauge(`vl_uncompressed_data_size_bytes{type="file"}`, func() float64 {
+ return float64(m().UncompressedFileSize)
+ })
+
+ ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_big_timestamp"}`, func() float64 {
+ return float64(m().RowsDroppedTooBigTimestamp)
+ })
+ ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_small_timestamp"}`, func() float64 {
+ return float64(m().RowsDroppedTooSmallTimestamp)
+ })
+
+ return ms
+}
diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md
new file mode 100644
index 000000000..edb7e2b2b
--- /dev/null
+++ b/docs/VictoriaLogs/LogsQL.md
@@ -0,0 +1,1087 @@
+# LogsQL
+
+LogsQL is a simple yet powerful query language for VictoriaLogs. It provides the following features:
+
+- Full-text search across [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+ See [word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter).
+- Ability to combine filters into arbitrary complex [logical filters](#logical-filter).
+- Ability to extract structured fields from unstructured logs at query time. See [these docs](#transformations).
+- Ability to calculate various stats over the selected log entries. See [these docs](#stats).
+
+## LogsQL tutorial
+
+If you aren't familiar with VictoriaLogs, then start with [key concepts docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html).
+
+Then follow these docs:
+- [How to run VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#how-to-run-victorialogs).
+- [how to ingest data into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#querying).
+
+The simplest LogsQL query is just a [word](#word), which must be found in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+For example, the following query finds all the logs with `error` word:
+
+```logsql
+error
+```
+
+This query matches logs with any [timestamp](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field),
+e.g. it may return logs from the previous year alongside recently ingested logs.
+
+If the queried [word](#word) clashes with LogsQL keywords, then just wrap it into quotes.
+For example, the following query finds all the log messages with `and` [word](#word):
+
+```logsql
+"and"
+```
+
+It is OK to wrap any word into quotes. For example:
+
+```logsql
+"error"
+```
+
+Moreover, it is possible to wrap phrases containing multiple words in quotes. For example, the following query
+finds log messages with the `error: cannot find file` phrase:
+
+```logsql
+"error: cannot find file"
+```
+
+Usually logs from the previous year aren't so interesting comparing to the recently ingested logs.
+So it is recommended adding [time filter](#time-filter) to the query.
+For example, the following query returns logs with the `error` [word](#word),
+which were ingested into VictoriaLogs during the last 5 minutes:
+
+```logsql
+error AND _time:[now-5m,now]
+```
+
+This query consists of two [filters](#filters) joined with `AND` [operator](#logical-filter):
+
+- The filter on the `error` [word](#word).
+- The filter on the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field).
+
+The `AND` operator means that the [log entry](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must match both filters in order to be selected.
+
+Typical LogsQL query constists of multiple [filters](#filters) joined with `AND` operator. It may be tiresome typing and then reading all these `AND` words.
+So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above:
+
+```logsql
+error _time:[now-5m,now]
+```
+
+The query returns the following [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) by default:
+
+- [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+- [`_stream` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
+- [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field)
+
+Logs may contain arbitrary number of other fields. If you need obtaining some of these fields in query results,
+then just refer them in the query with `field_name:*` [filter](#any-value-filter).
+For example, the following query returns `host.hostname` field additionally to `_msg`, `_stream` and `_time` fields:
+
+```logsql
+error _time:[now-5m,now] host.hostname:*
+```
+
+Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line.
+Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors:
+
+```logsql
+_time:[now-5m,now] error NOT buggy_app
+```
+
+This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char.
+So the following query is equivalent to the previous one:
+
+```logsql
+_time:[now-5m,now] error !buggy_app
+```
+
+Suppose another buggy app starts pushing invalid error logs to VictoriaLogs - it adds `foobar` [word](#word) to every emitted log line.
+No problems - just add `!foobar` to the query in order to remove these buggy logs:
+
+```logsql
+_time:[now-5m,now] error !buggy_app !foobar
+```
+
+This query can be rewritten to more clear query with the `OR` [operator](#logical-filter) inside parentheses:
+
+```logsql
+_time:[now-5m,now] error !(buggy_app OR foobar)
+```
+
+Note that the parentheses are required here, since otherwise the query won't return the expected results.
+The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar`. This query may return error logs
+from the buggy app if they contain `foobar` [word](#word). This query also continues returning all the error logs from the second buggy app.
+This is because of different priorities for `NOT`, `AND` and `OR` operators.
+Read [these docs](#logical-filter) for more details. There is no need in remembering all these priority rules -
+just wrap the needed query parts into explicit parentheses if you aren't sure in priority rules.
+As an additional bonus, explicit parentheses make queries easier to read and maintain.
+
+Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+This word can be stored in other [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) such as `log.level`.
+How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word:
+
+```logsq
+_time:[now-5m,now] log.level:error !(buggy_app OR foobar)
+```
+
+The field name can be wrapped into quotes if it contains special chars or keywords, which may clash with LogsQL syntax.
+Any [word](#word) also can be wrapped into quotes. So the following query is equivalent to the previous one:
+
+```logsql
+"_time":[now-5m,now] "log.level":"error" !("buggy_app" OR "foobar")
+```
+
+What if the application identifier - such as `buggy_app` and `foobar` - is stored in the `app` field? Correct - just add `app:` prefix in front of `buggy_app` and `foobar`:
+
+```logsql
+_time:[now-5m,now] log.level:error !(app:buggy_app OR app:foobar)
+```
+
+The query can be simplified by moving the `app:` prefix outside the parentheses:
+
+```logsql
+_time:[now-5m,now] log.level:error !app:(buggy_app OR foobar)
+```
+
+The `app` field uniquely identifies the application instance if a single instance runs per each unique `app`.
+In this case it is recommended associating the `app` field with [log stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). This usually improves both compression rate
+and query performance when querying the needed streams via [`_stream` filter](#stream-filter).
+If the `app` field is associated with the log stream, then the query above can be rewritten to more performant one:
+
+```logsql
+_time:[now-5m,now] log.level:error _stream:{app!~"buggy_app|foobar"}
+```
+
+This query completely skips scanning for logs from `buggy_app` and `foobar` apps, thus significantly reducing disk read IO and CPU time
+needed for performing the query.
+
+Finally, it is recommended reading [performance tips](#performance-tips).
+
+Now you are familiar with LogsQL basics. Read [query syntax](#query-syntax) if you want to continue learning LogsQL.
+
+### Key concepts
+
+#### Word
+
+LogsQL splits all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) into words
+delimited by non-word chars such as whitespace, parens, punctuation chars, etc. For example, the `foo: (bar,"тест")!` string
+is split into `foo`, `bar` and `тест` words. Words can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8) chars.
+These words are taken into account by full-text search filters such as
+[word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter).
+
+#### Query syntax
+
+LogsQL query consists of the following parts delimited by `|`:
+
+- [Filters](#filters), which select log entries for further processing. This part is required in LogsQL. Other parts are optional.
+- Optional [stream context](#stream-context), which allows selecting surrounding log lines for the matching log lines.
+- Optional [transformations](#transformations) for the selected log fields.
+ For example, an additional fields can be extracted or constructed from existing fields.
+- Optional [post-filters](#post-filters) for post-filtering of the selected results. For example, post-filtering can filter
+ results based on the fields constructed by [transformations](#transformations).
+- Optional [stats](#stats) transformations, which can calculate various stats across selected results.
+- Optional [sorting](#sorting), which can sort the results by the sepcified fields.
+- Optional [limiters](#limiters), which can apply various limits on the selected results.
+
+## Filters
+
+LogsQL supports various filters for searching for log messages (see below).
+They can be combined into arbitrary complex queries via [logical filters](#logical-filter).
+
+Filters are applied to [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) by default.
+If the filter must be applied to other [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model),
+then its' name followed by the colon must be put in front of the filter. For example, if `error` [word filter](#word-filter) must be applied
+to the `log.level` field, then use `log.level:error` query.
+
+Field names and filter args can be put into quotes if they contain special chars, which may clash with LogsQL syntax. LogsQL supports quoting via double quotes `"`,
+single quotes `'` and backticks:
+
+```logsql
+"some 'field':123":i('some("value")') AND `other"value'`
+```
+
+If doubt, it is recommended quoting field names and filter args.
+
+
+The list of LogsQL filters:
+
+- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field) in the given time range
+- [Stream filter](#stream-filter) - matches logs, which belong to the given [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
+- [Word filter](#word-filter) - matches logs with the given [word](#word)
+- [Phrase filter](#phrase-filter) - matches logs with the given phrase
+- [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix
+- [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+- [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+- [Exact filter](#exact-filter) - matches logs with the exact value
+- [Exact prefix filter](#exact-prefix-filter) - matches logs starting with the given prefix
+- [Multi-exact filter](#multi-exact-filter) - matches logs with at least one of the specified exact values
+- [Case-insensitive filter](#case-insensitive-filter) - matches logs with the given case-insensitive word, phrase or prefix
+- [Sequence filter](#sequence-filter) - matches logs with the given sequence of words or phrases
+- [Regexp filter](#regexp-filter) - matches logs for the given regexp
+- [Range filter](#range-filter) - matches logs with numeric [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range
+- [IPv4 range filter](#ipv4-range-filter) - matches logs with ip address [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range
+- [String range filter](#string-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given string range
+- [Length range filter](#length-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) of the given length range
+- [Logical filter](#logical-filter) - allows combining other filters
+
+
+### Time filter
+
+VictoriaLogs scans all the logs per each query if it doesn't contain the filter on [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field).
+It uses various optimizations in order to speed up full scan queries without the `_time` filter,
+but such queries can be slow if the storage contains large number of logs over long time range. The easiest way to optimize queries
+is to narrow down the search with the filter on [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field).
+
+For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+ingested into VictoriaLogs during the last hour, which contain the `error` [word](#word):
+
+```logsql
+_time:(now-1h, now) AND error
+```
+
+The following formats are supported for `_time` filter:
+
+- Fixed time:
+ - `_time:YYYY-MM-DD` - matches all the log messages for the particular day. For example, `_time:2023-04-25` matches all the log messages for April 25, 2023 by UTC.
+ - `_time:YYYY-MM` - matches all the log messages for the particular month. For example, `_time:2023-02` matches all the log messages for February, 2023 by UTC.
+ - `_time:YYYY` - matches all the log messages for the particular year. For example, `_time:2023` matches all the log message for 2023 by UTC.
+ - `_time:YYYY-MM-DDTHH` - matches all the log messages for the particular hour. For example, `_time:2023-04-25T22` matches all the log messages from `22:00` to `23:00`
+ on April 25, 2023 by UTC.
+ - `_time:YYYY-MM-DDTHH:MM` - matches all the log messages for the particular minute. For example, `_time:2023-04-25T22:45` matches all the log messages from `22:45` to `22:46`
+ on April 25, 2023 by UTC.
+ - `_time:YYYY-MM-DDTHH:MM:SS` - matches all the log messages for the particular second. For example, `_time:2023-04-25T22:45:59` matches all the log messages
+ from `22:45:59` to `23:46:00` on April 25, 2023 by UTC.
+
+- Time range:
+ - `_time:[min_time, max_time]` - matches log messages on the time range `[min_time, max_time]`, including both `min_time` and `max_time`.
+ The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats).
+ For example, `_time:[2023-04-01, 2023-04-30]` matches log messages for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04`.
+ - `_time:[min_time, max_time)` - matches log messages on the time range `[min_time, max_time)`, not including `max_time`.
+ The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats).
+ For example, `_time:[2023-02-01, 2023-03-01)` matches log messages for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02`.
+
+It is possible to specify time zone offset for all the absolute time formats by appending `+hh:mm` or `-hh:mm` suffix.
+For example, `_time:2023-04-25+05:30` matches all the log messages on April 25, 2023 by India time zone,
+while `_time:2023-02-07:00` matches all the log messages from February, 2023 by California time zone.
+
+Performance tips:
+
+- It is recommended specifying the smallest possible time range during the search, since it reduces the amounts of log entries, which need to be scanned during the query.
+ For example, `_time:[now-1h, now]` is usually faster than `_time:[now-5h, now]`.
+
+- While LogsQL supports arbitrary number of `_time:...` filters at any level of [logical filters](#logical-filter),
+ it is recommended specifying a single `_time` filter at the top level of the query.
+
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [Stream filter](#stream-filter)
+- [Word filter](#word-filter)
+
+### Stream filter
+
+VictoriaLogs provides an optimized way to select log entries, which belong to particular [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
+This can be done via `_stream:{...}` filter. The `{...}` may contain arbitrary [Prometheus-compatible label selector](https://docs.victoriametrics.com/keyConcepts.html#filtering)
+over fields associated with [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
+For example, the following query selects [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+with `app` field equal to `nginx`:
+
+```logsql
+_stream:{app="nginx"}
+```
+
+This query is equivalent to the following [exact()](#exact-filter) query, but the upper query usually works much faster:
+
+```logsql
+app:exact("nginx")
+```
+
+Performance tips:
+
+- It is recommended using the most specific `_stream:{...}` filter matching the smallest number of log streams,
+ which needs to be scanned by the rest of filters in the query.
+
+- While LogsQL supports arbitrary number of `_stream:{...}` filters at any level of [logical filters](#logical-filter),
+ it is recommended specifying a single `_stream:...` filter at the top level of the query.
+
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [Time filter](#time-filter)
+- [Exact filter](#exact-filter)
+
+### Word filter
+
+The simplest LogsQL query consists of a single [word](#word) to search in log messages. For example, the following query matches
+[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with `error` [word](#word) inside them:
+
+```logsql
+error
+```
+
+This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `error`
+- `an error happened`
+- `error: cannot open file`
+
+This query doesn't match the following log messages:
+
+- `ERROR`, since the filter is case-sensitive by default. Use `i(error)` for this case. See [these docs](#case-insenstive-filter) for details.
+- `multiple errors occurred`, since the `errors` word doesn't match `error` word. Use `error*` for this case. See [these docs](#prefix-filter) for details.
+
+By default the given [word](#word) is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the word and put a colon after it
+if it must be searched in the given field. For example, the following query returns log entries containing the `error` [word](#word) in the `log.level` field:
+
+```logsql
+log.level:error
+```
+
+Both the field name and the word in the query can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
+
+```logsql
+поле:значение
+```
+
+Both the field name and the word in the query can be put inside quotes if they contain special chars, which may clash with the query syntax.
+For example, the following query searches for the ip `1.2.3.45` in the field `ip:remote`:
+
+```logsql
+"ip:remote":"1.2.3.45"
+```
+
+See also:
+
+- [Phrase filter](#phrase-filter)
+- [Exact filter](#exact-filter)
+- [Prefix filter](#prefix-filter)
+- [Logical filter](#logical-filter)
+
+
+### Phrase filter
+
+Is you need to search for log messages with the specific phrase inside them, then just wrap the phrase in quotes.
+The phrase can contain any chars, including whitespace, punctuation, parens, etc. They are taken into account during the search.
+For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with `cannot open file` phrase inside them:
+
+```logsql
+"cannot open file"
+```
+
+This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `ERROR: cannot open file /foo/bar/baz`
+- `cannot open file: permission denied`
+
+This query doesn't match the following log messages:
+
+- `cannot open file`, since the number of whitespace chars between words doesn't match the number of whitespace chars in the search phrase.
+ Use `seq("cannot", "open", "file")` query instead. See [these docs](#sequence-filter) for details.
+- `open file: cannot do this`, since the message doesn't contain the full phrase requested in the query. If you need matching a message
+ with all the [words](#word) listed in the query, then use `cannot AND open AND file` query. See [these docs](#logical-filter) for details.
+- `cannot open files`, since the message ends with `files` [word](#word) instead of `file` word. Use `"cannot open file"*` query for this case.
+ See [these docs](#prefix-filter) for details.
+- `Cannot open file: failure`, since the `Cannot` word starts with capital letter. Use `i("cannot open file")` for this case.
+ See [these docs](#case-insensitive-filter) for details.
+
+By default the given phrase is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the phrase and put a colon after it
+if it must be searched in the given field. For example, the following query returns log entries containing the `cannot open file` phrase in the `event.original` field:
+
+```logsql
+event.original:"cannot open file"
+```
+
+Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
+
+```logsql
+сообщение:"невозможно открыть файл"
+```
+
+The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
+For example, the following query searches for the `cannot open file` phrase in the field `some:message`:
+
+```logsql
+"some:message":"cannot open file"
+```
+
+See also:
+
+- [Exact filter](#exact-filter)
+- [Word filter](#word-filter)
+- [Prefix filter](#prefix-filter)
+- [Logical filter](#logical-filter)
+
+
+### Prefix filter
+
+If you need to search for log messages with [words](#word) / phrases containing some prefix, then just add `*` char to the end of the [word](#word) / phrase in the query.
+For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain [words](#word) with `err` prefix:
+
+```logsql
+err*
+```
+
+This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `err: foobar`
+- `cannot open file: error occurred`
+
+This query doesn't match the following log messages:
+
+- `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details.
+- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `re("err")` for this case. See [these docs](#regexp-filter) for details.
+
+Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches
+[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix:
+
+```logsql
+"unexpected fail"*
+```
+
+This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `unexpected fail: IO error`
+- `error:unexpected failure`
+
+This query doesn't match the following log messages:
+
+- `unexpectedly failed`, since the `unexpectedly` doesn't match `unexpected` [word](#word). Use `unexpected* AND fail*` for this case.
+ See [these docs](#logical-filter) for details.
+- `failed to open file: unexpected EOF`, since `failed` [word](#word) occurs before the `unexpected` word. Use `unexpected AND fail*` for this case.
+ See [these docs](#logical-filter) for details.
+
+By default the prefix filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the prefix filter
+in order to apply it to the given field. For example, the following query matches `log.level` field containing any word with the `err` prefix:
+
+```logsql
+log.level:err*
+```
+
+If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
+For example, the following query matches `log:level` field containing any word with the `err` prefix.
+
+```logsql
+"log:level":err*
+```
+
+Performance tips:
+
+- Prefer using [word filters](#word-filter) and [phrase filters](#phrase-filter) combined via [logical filter](#logical-filter)
+ instead of prefix filter.
+- Prefer moving [word filters](#word-filter) and [phrase filters](#phrase-filter) in front of prefix filter when using [logical filter](#logical-filter).
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [Exact prefix filter](#exact-prefix-filter)
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Exact-filter](#exact-filter)
+- [Logical filter](#logical-filter)
+
+
+### Empty value filter
+
+Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+This can be performed with `log_field:""` syntax. For example, the following query matches log entries without `host.hostname` field:
+
+```logsql
+host.hostname:""
+```
+
+See also:
+
+- [Any value filter](#any-value-filter)
+- [Word filter](#word-filter)
+- [Logical filter](#logical-filter)
+
+
+### Any value filter
+
+Sometimes it is needed to find log entries containing any non-empty value for the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+This can be performed with `log_field:*` syntax. For example, the following query matches log entries with non-empty `host.hostname` field:
+
+```logsql
+host.hostname:*
+```
+
+See also:
+
+- [Empty value filter](#empty-value-filter)
+- [Prefix filter](#prefix-filter)
+- [Logical filter](#logical-filter)
+
+
+### Exact filter
+
+The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages
+or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact(...)` filter.
+For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`:
+
+```logsql
+exact("fatal error: cannot find /foo/bar")
+```
+
+The query doesn't match the following log messages:
+
+- `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text
+ other than the specified in the `exact()` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details.
+
+- `FATAL ERROR: cannot find /foo/bar`, since the `exact()` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case.
+ See [these docs](#case-insensitive-filter) for details.
+
+By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it
+if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field:
+
+```logsql
+log.level:exact("error")
+```
+
+Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
+
+```logsql
+log.уровень:exact("ошибка")
+```
+
+The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
+For example, the following query matches the `error` value in the field `log:level`:
+
+```logsql
+"log:level":exact("error")
+```
+
+See also:
+
+- [Exact prefix filter](#exact-prefix-filter)
+- [Multi-exact filter](#multi-exact-filter)
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Prefix filter](#prefix-filter)
+- [Logical filter](#logical-filter)
+
+
+### Exact prefix filter
+
+Sometimes it is needed to find log messages starting with some prefix. This can be done with the `exact_prefix(...)` filter.
+For example, the following query matches log messages, which start from `Processing request` prefix:
+
+```logsql
+exact_prefix("Processing request")
+```
+
+This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `Processing request foobar`
+- `Processing requests from ...`
+
+It doesn't match the following log messages:
+
+- `processing request foobar`, since the log message starts with lowercase `p`. Use `exact_prefix("processing request") OR exact_prefix("Processing request")`
+ query in this case. See [these docs](#logical-filter) for details.
+- `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case.
+ See [these docs](#phrase-filter) for details.
+
+By default the `exact_prefix()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact_prefix()` filter and put a colon after it
+if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix:
+
+```logsql
+log.level:exact_prefix("err")
+```
+
+Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
+
+```logsql
+log.уровень:exact_prefix("ошиб")
+```
+
+The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
+For example, the following query matches `log:level` values starting with `err` prefix:
+
+```logsql
+"log:level":exact_prefix("err")
+```
+
+See also:
+
+- [Exact filter](#exact-filter)
+- [Prefix filter](#prefix-filter)
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Logical filter](#logical-filter)
+
+
+### Multi-exact filter
+
+Sometimes it is needed to locate log messages with a field containing at least one of the given values. This can be done with multiple [exact filters](#exact-filter)
+combined into a single [logical filter](#logical-filter). For example, the following query matches log messages with `log.level` field
+containing either `error` or `fatal` exact values:
+
+```logsql
+log.level:(exact("error") OR exact("fatal"))
+```
+
+While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter.
+
+```logsql
+log.level:in("error", "fatal")
+```
+
+It works very fast for long lists passed to `in()`.
+
+The future VictoriaLogs versions will allow passing arbitrary [queries](#query-syntax) into `in()` filter.
+For example, the following query selects all the logs for the last hour for users, who visited pages with `admin` [word](#word) in the `path`
+during the last day:
+
+```logsql
+_time:[now-1h,now] AND user_id:in(_time:[now-1d,now] AND path:admin | fields user_id)
+```
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+See also:
+
+- [Exact filter](#exact-filter)
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Prefix filter](#prefix-filter)
+- [Logical filter](#logical-filter)
+
+
+### Case-insensitive filter
+
+Case-insensitive filter can be applied to any word, phrase or prefix by wrapping the corresponding [word filter](#word-filter),
+[phrase filter](#phrase-filter) or [prefix filter](#prefix-filter) into `i()`. For example, the following query returns
+log messages with `error` word in any case:
+
+```logsql
+i(error)
+```
+
+The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `unknown error happened`
+- `ERROR: cannot read file`
+- `Error: unknown arg`
+- `An ErRoR occured`
+
+The query doesn't match the following log messages:
+
+- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `re("(?i)error")` for this case. See [these docs](#regexp-filter) for details.
+- `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case.
+
+By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
+in order to apply it to the given field. For example, the following query matches `log.level` field containing `error` [word](#word) in any case:
+
+```logsql
+log.level:i(error)
+```
+
+If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
+For example, the following query matches `log:level` field containing `error` [word](#word) in any case.
+
+```logsql
+"log:level":i("error")
+```
+
+Performance tips:
+
+- Prefer using case-sensitive filter over case-insensitive filter.
+- Prefer moving [word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter) in front of case-sensitive filter
+ when using [logical filter](#logical-filter).
+- See [other performance tips](#performance-tips).
+
+
+See also:
+
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Exact-filter](#exact-filter)
+- [Logical filter](#logical-filter)
+
+
+### Sequence filter
+
+Sometimes it is needed to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+with [words](#word) or phrases in a particular order. For example, if log messages with `error` word followed by `open file` phrase
+must be found, then the following LogsQL query can be used:
+
+```logsql
+seq("error", "open file")
+```
+
+This query matches `some error: cannot open file /foo/bar` message, since the `open file` phrase goes after the `error` [word](#word).
+The query doesn't match the `cannot open file: error` message, since the `open file` phrase is located in front of the `error` [word](#word).
+If you need matching log messages with both `error` word and `open file` phrase, then use `error AND "open file"` query. See [these docs](#logical-filter)
+for details.
+
+By default the `seq()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
+in order to apply it to the given field. For example, the following query matches `event.original` field containing `(error, "open file")` sequence:
+
+```logsql
+event.original:seq(error, "open file")
+```
+
+If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
+For example, the following query matches `event:original` field containing `(error, "open file")` sequence:
+
+```logsql
+"event.original":seq(error, "open file")
+```
+
+See also:
+
+- [Word filter](#word-filter)
+- [Phrase filter](#phrase-filter)
+- [Exact-filter](#exact-filter)
+- [Logical filter](#logical-filter)
+
+
+### Regexp filter
+
+LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `re(...)` expression.
+For example, the following query returns all the log messages containing `error` or `warn` susbstrings:
+
+```logsql
+re("error|warn")
+```
+
+The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
+
+- `error: cannot read data`
+- `A warning has been raised`
+
+By default the `re()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
+in order to apply it to the given field. For example, the following query matches `event.original` field containing either `error` or `warn` substrings:
+
+```logsql
+event.original:re("error|warn")
+```
+
+If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
+For example, the following query matches `event:original` field containing either `error` or `warn` substrings:
+
+```logsql
+"event.original":re("error|warn")
+```
+
+Performance tips:
+
+- Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter.
+ For example, the `re("error|warning")` query can be substituted with `error OR warning` query, which usually works much faster.
+ See also [multi-exact filter](#multi-exact-filter).
+- Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first.
+- Prefer using `exact_prefix("some prefix")` instead of `re("^some prefix")`, since the [exact_prefix()](#exact-prefix-filter) works much faster than the `re()` filter.
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [Case-insensitive filter](#case-insensitive-filter)
+- [Logical filter](#logical-filter)
+
+
+### Range filter
+
+If you need to filter log message by some field containing only numeric values, then the `range()` filter can be used.
+For example, if the `request.duration` field contains the request duration in seconds, then the following LogsQL query can be used
+for searching for log entries with request durations exceeding 4.2 seconds:
+
+```logsql
+request.duration:range(4.2, Inf)
+```
+
+The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding
+parentheses with square brackets. For example:
+
+- `range[1, 10)` includes `1` in the matching range
+- `range(1, 10]` includes `10` in the matching range
+- `range[1, 10]` includes `1` and `10` in the matching range
+
+Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
+[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
+Extract the numeric value from the message with `parse(_msg, "the request took seconds")` [transformation](#transformations)
+and then apply the `range()` [post-filter](#post-filters) to the extracted `request_duration` field.
+
+Performance tips:
+
+- It is better to query pure numeric [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+ instead of extracting numeric field from text field via [transformations](#transformations) at query time.
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [IPv4 range filter](#ipv4-range-filter)
+- [String range filter](#string-range-filter)
+- [Length range filter](#length-range-filter)
+- [Logical filter](#logical-filter)
+
+
+### IPv4 range filter
+
+If you need to filter log message by some field containing only [IPv4](https://en.wikipedia.org/wiki/Internet_Protocol_version_4) addresses such as `1.2.3.4`,
+then the `ipv4_range()` filter can be used. For example, the following query matches log entries with `user.ip` address in the range `[127.0.0.0 - 127.255.255.255]`:
+
+```logsql
+user.ip:ipv4_range(127.0.0.0, 127.255.255.255)
+```
+
+The `ipv4_range()` accepts also IPv4 subnetworks in [CIDR notation](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation).
+For example, the following query is equivalent to the query above:
+
+```logsql
+user.ip:ipv4_range("127.0.0.0/8")
+```
+
+If you need matching a single IPv4 address, then just put it inside `ipv4_range()`. For example, the following query matches `1.2.3.4` IP
+at `user.ip` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
+
+```logsql
+user.ip:ipv4_range("1.2.3.4")
+```
+
+Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")`
+doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from : done")` [transformation](#transformations)
+and then apply the `ipv4_range()` [post-filter](#post-filters) to the extracted `ip` field.
+
+Hints:
+
+- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the given `X.Y.Z.Q` IPv4 address,
+ then `"X.Y.Z.Q"` query can be used. See [these docs](#phrase-filter) for details.
+- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing
+ at least a single IPv4 address out of the given list, then `"ip1" OR "ip2" OR ... "ipN"` query can be used. See [these docs](#logical-filter) for details.
+- If you need finding log entries with `ip` field in multiple ranges, then use `ip:(ipv4_range(range1) OR ipv4_range(range2) ... OR ipv4_range(rangeN)` query.
+ See [these docs](#logical-filter) for details.
+
+Performance tips:
+
+- It is better querying pure IPv4 [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+ instead of extracting IPv4 from text field via [transformations](#transformations) at query time.
+- See [other performance tips](#performance-tips).
+
+See also:
+
+- [Range filter](#range-filter)
+- [String range filter](#string-range-filter)
+- [Length range filter](#length-range-filter)
+- [Logical filter](#logical-filter)
+
+
+### String range filter
+
+If you need to filter log message by some field with string values in some range, then `string_range()` filter can be used.
+For example, the following LogsQL query matches log entries with `user.name` field starting from `A` and `B` chars:
+
+```logsql
+user.name:string_range(A, C)
+```
+
+The `string_range()` includes the lower bound, while excluding the upper bound. This simplifies querying distinct sets of logs.
+For example, the `user.name:string_range(C, E)` would match `user.name` fields, which start from `C` and `D` chars.
+
+See also:
+
+- [Range filter](#range-filter)
+- [IPv4 range filter](#ipv4-range-filter)
+- [Length range filter](#length-range-filter)
+- [Logical filter](#logical-filter)
+
+
+### Length range filter
+
+If you need to filter log message by its length, then `len_range()` filter can be used.
+For example, the following LogsQL query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+with lengths in the range `[5, 10]` chars:
+
+```logsql
+len_range(5, 10)
+```
+
+This query matches the following log messages, since their length is in the requested range:
+
+- `foobar`
+- `foo bar`
+
+This query doesn't match the following log messages:
+
+- `foo`, since it is too short
+- `foo bar baz abc`, sinc it is too long
+
+By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
+Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply
+the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars:
+
+```logsql
+foo:len_range(10, 20)
+```
+
+See also:
+
+- [Range filter](#range-filter)
+- [Logical filter](#logical-filter)
+
+
+### Logical filter
+
+Simpler LogsQL [filters](#filters) can be combined into more complex filters with the following logical operations:
+
+- `q1 AND q2` - matches common log entries returned by both `q1` and `q2`. Arbitrary number of [filters](#filters) can be combined with `AND` operation.
+ For example, `error AND file AND app` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+ which simultaneously contain `error`, `file` and `app` [words](#word).
+ The `AND` operation is frequently used in LogsQL queries, so it is allowed to skip the `AND` word.
+ For example, `error file app` is equivalent to `error AND file AND app`.
+
+- `q1 OR q2` - merges log entries returned by both `q1` and `q2`. Aribtrary number of [filters](#filters) can be combined with `OR` operation.
+ For example, `error OR warning OR info` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+ which contain at least one of `error`, `warning` or `info` [words](#word).
+
+- `NOT q` - returns all the log entries except of those which match `q`. For example, `NOT info` returns all the
+ [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+ which do not contain `info` [word](#word). The `NOT` operation is frequently used in LogsQL queries, so it is allowed substituting `NOT` with `!` in queries.
+ For example, `!info` is equivalent to `NOT info`.
+
+The `NOT` operation has the highest priority, `AND` has the middle priority and `OR` has the lowest priority.
+The priority order can be changed with parentheses. For example, `NOT info OR debug` is interpreted as `(NOT info) OR debug`,
+so it matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
+which do not contain `info` [word](#word), while it also matches messages with `debug` word (which may contain the `info` word).
+This is not what most users expect. In this case the query can be rewritten to `NOT (info OR debug)`,
+which correctly returns log messages without `info` and `debug` [words](#word).
+
+LogsQL supports arbitrary complex logical queries with arbitrary mix of `AND`, `OR` and `NOT` operations and parentheses.
+
+By default logical filters apply to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+unless the inner filters explicitly specify the needed [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) via `field_name:filter` syntax.
+For example, `(error OR warn) AND host.hostname:host123` is interpreted as `(_msg:error OR _msg:warn) AND host.hostname:host123`.
+
+It is possible to specify a single [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) for multiple filters
+with the following syntax:
+
+```logsql
+field_name:(q1 OR q2 OR ... qN)
+```
+
+For example, `log.level:error OR log.level:warning OR log.level:info` can be substituted with the shorter query: `log.level:(error OR warning OR info)`.
+
+Performance tips:
+
+- VictoriaLogs executes logical operations from the left to the right, so it is recommended moving the most specific
+ and the fastest filters (such as [word filter](#word-filter) and [phrase filter](#phrase-filter)) to the left,
+ while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter))
+ to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
+ with the `error` word, which match some `/foo/(bar|baz)` regexp,
+ it is better from performance PoV to use the query `error re("/foo/(bar|baz)")` instead of `re("/foo/(bar|baz)") error`.
+
+ The most specific filter means that it matches the lowest number of log entries comparing to other filters.
+
+- See [other performance tips](#performance-tips).
+
+## Stream context
+
+LogsQL will support the ability to select the given number of surrounding log lines for the selected log lines
+on a [per-stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) basis.
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Transformations
+
+It is possible to perform various transformations on the [selected log entries](#filters) at client side
+with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+LogsQL will support the following transformations for the [selected](#filters) log entries:
+
+- Extracting the specified fields from text [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
+- Extracting the specified fields from JSON strings stored inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+- Extracting the specified fields from [logfmt](https://brandur.org/logfmt) strings stored
+ inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+ according to the provided format.
+- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+- Copying of the existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+- Parsing duration strings into floating-point seconds for further [stats calculations](#stats).
+- Creating a boolean field with the result of arbitrary [post-filters](#post-filters) applied to the current fields.
+ Boolean fields may be useful for [conditional stats calculation](#stats).
+- Creating an integer field with the length of the given field value. This can be useful for [stats calculations](#stats).
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Post-filters
+
+It is possible to perform post-filtering on the [selected log entries](#filters) at client side with `grep` or similar Unix commands
+according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+LogsQL will support post-filtering on the original [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+and fields created by various [transformations](#transformations). The following post-filters will be supported:
+
+- Full-text [filtering](#filters).
+- [Logical filtering](#logical-filter).
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Stats
+
+It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
+according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+LogsQL will support calculating the following stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+and fields created by [transformations](#transformations):
+
+- The number of selected logs.
+- The number of non-empty values for the given field.
+- The number of unique values for the given field.
+- The min, max, avg, and sum for the given field.
+- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
+
+It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
+For example, `sumIf(response_size, is_admin:true)` calculates the total response size for admins only.
+
+It will be possible to group stats by the specified [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+and by the specified time buckets.
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Sorting
+
+By default VictoriaLogs doesn't sort the returned results because of performance and efficiency concerns
+described [here](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+It is possible to sort the [selected log entries](#filters) at client side with `sort` Unix command
+according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+LogsQL will support results' sorting by the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Limiters
+
+It is possible to limit the returned results with `head`, `tail`, `less`, etc. Unix commands
+according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+LogsQL will support the ability to limit the number of returned results alongside the ability to page the returned results.
+Additionally, LogsQL will provide the ability to select fields, which must be returned in the response.
+
+See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
+
+## Performance tips
+
+- It is highly recommended specifying [time filter](#time-filter) in order to narrow down the search to specific time range.
+- It is highly recommended specifying [stream filter](#stream-filter) in order to narrow down the search
+ to specific [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
+- Move faster filters such as [word filter](#word-filter) and [phrase filter](#phrase-filter) to the beginning of the query.
+ This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
+- Move more specific filters, which match lower number of log entries, to the beginning of the query.
+ This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
diff --git a/docs/VictoriaLogs/README.md b/docs/VictoriaLogs/README.md
new file mode 100644
index 000000000..8cc1d660e
--- /dev/null
+++ b/docs/VictoriaLogs/README.md
@@ -0,0 +1,481 @@
+# VictoriaLogs
+
+VictoriaLogs is log management and log analytics system from [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/).
+
+It provides the following key features:
+
+- VictoriaLogs can accept logs from popular log collectors, which support
+ [ElasticSearch data ingestion format](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html). See [these docs](#data-ingestion).
+ [Grafana Loki data ingestion format](https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki) will be supported in the near future -
+ see [the Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html).
+- VictoriaLogs is much easier to setup and operate comparing to ElasticSearch and Grafana Loki. See [these docs](#operation).
+- VictoriaLogs provides easy yet powerful query language with full-text search capabilities across
+ all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) -
+ see [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
+- VictoriaLogs can be seamlessly combined with good old Unix tools for log analysis such as `grep`, `less`, `sort`, `jq`, etc.
+ See [these docs](#querying-via-command-line) for details.
+- VictoriaLogs capacity and performance scales lineraly with the available resources (CPU, RAM, disk IO, disk space).
+ It runs smoothly on both Raspberry PI and a beefy server with hundreds of CPU cores and terabytes of RAM.
+- VictoriaLogs can handle much bigger data volumes than ElasticSearch and Grafana Loki when running on comparable hardware.
+ A single-node VictoriaLogs instance can substitute large ElasticSearch cluster.
+
+## Operation
+
+### How to run VictoriaLogs
+
+Checkout VictoriaLogs source code. It is located in the VictoriaMetrics repository:
+
+```bash
+git clone https://github.com/VictoriaMetrics/VictoriaMetrics
+cd VictoriaMetrics
+```
+
+Then build VictoriaLogs. The build command requires [Go 1.20](https://golang.org/doc/install).
+
+```bash
+make victoria-logs
+```
+
+Then run the built binary:
+
+```bash
+bin/victoria-logs
+```
+
+VictoriaLogs is ready to [receive logs](#data-ingestion) and [query logs](#querying) at the TCP port `9428` now!
+It has no any external dependencies, so it may run in various environments without additional setup and configuration.
+VictoriaLogs automatically adapts to the available CPU and RAM resources. It also automatically setups and creates
+the needed indexes during [data ingestion](#data-ingestion).
+
+It is possible to change the TCP port via `-httpListenAddr` command-line flag. For example, the following command
+starts VictoriaLogs, which accepts incoming requests at port `9200` (aka ElasticSearch HTTP API port):
+
+```bash
+/path/to/victoria-logs -httpListenAddr=:9200
+```
+
+VictoriaLogs stores the ingested data to the `victoria-logs-data` directory by default. The directory can be changed
+via `-storageDataPath` command-line flag. See [these docs](#storage) for details.
+
+By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range.
+E.g. it uses the retention of 7 days. Read [these docs](#retention) on how to control the retention for the [ingested](#data-ingestion) logs.
+
+It is recommended setting up monitoring of VictoriaLogs according to [these docs](#monitoring).
+
+### Data ingestion
+
+VictoriaLogs supports the following data ingestion techniques:
+
+- Via [Filebeat](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-overview.html). See [these docs](#filebeat-setup).
+- Via [Logstash](https://www.elastic.co/guide/en/logstash/current/introduction.html). See [these docs](#logstash-setup).
+
+The ingested log entries can be queried according to [these docs](#querying).
+
+#### Data ingestion troubleshooting
+
+VictoriaLogs provides the following command-line flags, which can help debugging data ingestion issues:
+
+- `-logNewStreams` - if this flag is passed to VictoriaLogs, then it logs all the newly
+ registered [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
+ This may help debugging [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
+- `-logIngestedRows` - if this flag is passed to VictoriaLogs, then it logs all the ingested
+ [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
+
+VictoriaLogs exposes various [metrics](#monitoring), which may help debugging data ingestion issues:
+
+- `vl_rows_ingested_total` - the number of ingested [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
+ since the last VictoriaLogs restart. If this number icreases over time, then logs are successfully ingested into VictoriaLogs.
+ The ingested logs can be inspected in logs by passing `-logIngestedRows` command-line flag to VictoriaLogs.
+- `vl_streams_created_total` - the number of created [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
+ since the last VictoriaLogs restart. If this metric grows rapidly during extended periods of time, then this may lead
+ to [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
+ The newly created log streams can be inspected in logs by passing `-logNewStreams` command-line flag to VictoriaLogs.
+
+#### Filebeat setup
+
+Specify [`output.elasicsearch`](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) section in the `filebeat.yml`
+for sending the collected logs to VictoriaLogs:
+
+```yml
+output.elasticsearch:
+ hosts: ["http://localhost:9428/insert/elasticsearch/"]
+ parameters:
+ _msg_field: "message"
+ _time_field: "@timestamp"
+ _stream_fields: "host.hostname,log.file.path"
+```
+
+Substitute the `localhost:9428` address inside `hosts` section with the real TCP address of VictoriaLogs.
+
+The `_msg_field` parameter must contain the field name with the log message generated by Filebeat. This is usually `message` field.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details.
+
+The `_time_field` parameter must contain the field name with the log timestamp generated by Filebeat. This is usually `@timestamp` field.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details.
+
+It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Filebeat, in the `_stream_fields` parameter.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details.
+
+If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed,
+then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore.
+For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
+
+```yml
+output.elasticsearch:
+ hosts: ["http://localhost:9428/insert/elasticsearch/"]
+ parameters:
+ _msg_field: "message"
+ _time_field: "@timestamp"
+ _stream_fields: "host.name,log.file.path"
+ ignore_fields: "log.offset,event.original"
+```
+
+When Filebeat ingests logs into VictoriaLogs at a high rate, then it may be needed to tune `worker` and `bulk_max_size` options.
+For example, the following config is optimized for higher than usual ingestion rate:
+
+```yml
+output.elasticsearch:
+ hosts: ["http://localhost:9428/insert/elasticsearch/"]
+ parameters:
+ _msg_field: "message"
+ _time_field: "@timestamp"
+ _stream_fields: "host.name,log.file.path"
+ worker: 8
+ bulk_max_size: 1000
+```
+
+If the Filebeat sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `compression_level` option.
+This usually allows saving network bandwidth and costs by up to 5 times:
+
+```yml
+output.elasticsearch:
+ hosts: ["http://localhost:9428/insert/elasticsearch/"]
+ parameters:
+ _msg_field: "message"
+ _time_field: "@timestamp"
+ _stream_fields: "host.name,log.file.path"
+ compression_level: 1
+```
+
+By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
+If you need storing logs in other tenant, then specify the needed tenant via `headers` at `output.elasticsearch` section.
+For example, the following `filebeat.yml` config instructs Filebeat to store the data to `(AccountID=12, ProjectID=34)` tenant:
+
+```yml
+output.elasticsearch:
+ hosts: ["http://localhost:9428/insert/elasticsearch/"]
+ headers:
+ AccountID: 12
+ ProjectID: 34
+ parameters:
+ _msg_field: "message"
+ _time_field: "@timestamp"
+ _stream_fields: "host.name,log.file.path"
+```
+
+The ingested log entries can be queried according to [these docs](#querying).
+
+See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs.
+
+#### Logstash setup
+
+Specify [`output.elasticsearch`](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) section in the `logstash.conf` file
+for sending the collected logs to VictoriaLogs:
+
+```conf
+output {
+ elasticsearch {
+ hosts => ["http://localhost:9428/insert/elasticsearch/"]
+ parameters => {
+ "_msg_field" => "message"
+ "_time_field" => "@timestamp"
+ "_stream_fields" => "host.name,process.name"
+ }
+ }
+}
+```
+
+Substitute `localhost:9428` address inside `hosts` with the real TCP address of VictoriaLogs.
+
+The `_msg_field` parameter must contain the field name with the log message generated by Logstash. This is usually `message` field.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details.
+
+The `_time_field` parameter must contain the field name with the log timestamp generated by Logstash. This is usually `@timestamp` field.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details.
+
+It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Logstash, in the `_stream_fields` parameter.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details.
+
+If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed,
+then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore.
+For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
+
+```conf
+output {
+ elasticsearch {
+ hosts => ["http://localhost:9428/insert/elasticsearch/"]
+ parameters => {
+ "_msg_field" => "message"
+ "_time_field" => "@timestamp"
+ "_stream_fields" => "host.hostname,process.name"
+ "ignore_fields" => "log.offset,event.original"
+ }
+ }
+}
+```
+
+If the Logstash sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `http_compression: true` option.
+This usually allows saving network bandwidth and costs by up to 5 times:
+
+```conf
+output {
+ elasticsearch {
+ hosts => ["http://localhost:9428/insert/elasticsearch/"]
+ parameters => {
+ "_msg_field" => "message"
+ "_time_field" => "@timestamp"
+ "_stream_fields" => "host.hostname,process.name"
+ }
+ http_compression => true
+ }
+}
+```
+
+By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
+If you need storing logs in other tenant, then specify the needed tenant via `custom_headers` at `output.elasticsearch` section.
+For example, the following `logstash.conf` config instructs Logstash to store the data to `(AccountID=12, ProjectID=34)` tenant:
+
+```conf
+output {
+ elasticsearch {
+ hosts => ["http://localhost:9428/insert/elasticsearch/"]
+ custom_headers => {
+ "AccountID" => "1"
+ "ProjectID" => "2"
+ }
+ parameters => {
+ "_msg_field" => "message"
+ "_time_field" => "@timestamp"
+ "_stream_fields" => "host.hostname,process.name"
+ }
+ }
+}
+```
+
+The ingested log entries can be queried according to [these docs](#querying).
+
+See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs.
+
+### Querying
+
+VictoriaLogs can be queried at the `/select/logsql/query` endpoint. The [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html)
+query must be passed via `query` argument. For example, the following query returns all the log entries with the `error` word:
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error'
+```
+
+The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body
+with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries
+when they do not fit the maximum url length of the used clients and proxies.
+
+See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for details on what can be passed to the `query` arg.
+The `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl`
+or similar tools.
+
+The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON),
+where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`.
+Example response:
+
+```
+{"_msg":"error: disconnect from 19.54.37.22: Auth fail [preauth]","_stream":"{}","_time":"2023-01-01T13:32:13Z"}
+{"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"}
+```
+
+The matching lines are sent to the response stream as soon as they are found in VictoriaLogs storage.
+This means that the returned response may contain billions of lines for queries matching too many log entries.
+The response can be interrupted at any time by closing the connection to VictoriaLogs server.
+This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc.
+See [these docs](#querying-via-command-line) for more details.
+
+The returned lines aren't sorted by default, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
+Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting)
+or at client side with the usual `sort` command according to [these docs](#querying-via-command-line).
+
+By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy) is queried.
+If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches
+for log messages at `(AccountID=12, ProjectID=34)` tenant:
+
+```bash
+curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID: 34' -d 'query=error'
+```
+
+The number of requests to `/select/logsql/query` can be [monitored](#monitoring) with `vl_http_requests_total{path="/select/logsql/query"}` metric.
+
+#### Querying via command-line
+
+VictoriaLogs provides good integration with `curl` and other command-line tools because of the following features:
+
+- VictoriaLogs sends the matching log entries to the response stream as soon as they are found.
+ This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix)).
+- VictoriaLogs automatically adjusts query execution speed to the speed of the client, which reads the response stream.
+ For example, if the response stream is piped to `less` command, then the query is suspended
+ until the `less` command reads the next block from the response stream.
+- VictoriaLogs automatically cancels query execution when the client closes the response stream.
+ For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query
+ when the `head` command closes the response stream.
+
+These features allow executing queries at command-line interface, which potentially select billions of rows,
+without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server.
+
+For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains
+many log messages with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word):
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error'
+```
+
+If the command returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query.
+VictoriaLogs notices that the response stream is closed, so it cancels the query and instantly stops consuming CPU, RAM and disk IO for this query.
+
+Then just use `head` command for investigating the returned log messages and narrowing down the query:
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error' | head -10
+```
+
+The `head -10` command reads only the first 10 log messages from the response and then closes the response stream.
+This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources.
+
+Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response:
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error' | less
+```
+
+The `less` command reads the response stream on demand, when the user scrolls down the output.
+VictoriaLogs suspends query execution when `less` stops reading the response stream.
+It doesn't consume CPU and disk IO resources during this time. It resumes query execution
+when the `less` continues reading the response stream.
+
+Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain
+`cannot open file` [phrase](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#phrase-filter).
+Then the query can be narrowed down to `error AND "cannot open file"`
+(see [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator).
+Then run the updated command in order to continue the investigation:
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error AND "cannot open file"' | head
+```
+
+Note that the `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl`
+or similar tools.
+
+The `pipe the query to "head" or "less" -> investigate the results -> refine the query` iteration
+can be repeated multiple times until the needed log messages are found.
+
+The returned VictoriaLogs query response can be post-processed with any combination of Unix commands,
+which are usually used for log analysis - `grep`, `jq`, `awk`, `sort`, `uniq`, `wc`, etc.
+
+For example, the following command uses `wc -l` Unix command for counting the number of log messages
+with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word)
+received from [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) with `app="nginx"` field
+during the last 5 minutes:
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:[now-5m,now] AND error' | wc -l
+```
+
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) about `_stream` filter,
+[these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) about `_time` filter
+and [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator.
+
+The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field):
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less
+```
+
+This command uses `jq` for extracting [`_time`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field)
+and [`_msg`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) fields from the returned results,
+and piping them to `sort` command.
+
+Note that the `sort` command needs to read all the response stream before returning the sorted results. So the command above
+can take non-trivial amounts of time if the `query` returns too many results. The solution is to narrow down the `query`
+before sorting the results. See [these tips](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#performance-tips)
+on how to narrow down query results.
+
+The following example calculates stats on the number of log messages received during the last 5 minutes
+grouped by `log.level` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
+
+```bash
+curl http://localhost:9428/select/logsql/query -d 'query=_time:[now-5m,now] log.level:*' | jq -r '."log.level"' | sort | uniq -c
+```
+
+The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter),
+then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values
+are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats.
+
+See also:
+
+- [Key concepts](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html).
+- [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
+
+
+### Monitoring
+
+VictoriaLogs exposes internal metrics in Prometheus exposition format at `http://localhost:9428/metrics` page.
+It is recommended to set up monitoring of these metrics via VictoriaMetrics
+(see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)),
+vmagent (see [these docs](https://docs.victoriametrics.com/vmagent.html#how-to-collect-metrics-in-prometheus-format)) or via Prometheus.
+
+VictoriaLogs emits own logs to stdout. It is recommended investigating these logs during troubleshooting.
+
+
+### Retention
+
+By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range.
+E.g. it uses the retention of 7 days. The retention can be configured with `-retentionPeriod` command-line flag.
+This flag accepts values starting from `1d` (one day) up to `100y` (100 years). See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations)
+for the supported duration formats.
+
+For example, the following command starts VictoriaLogs with the retention of 8 weeks:
+
+```bash
+/path/to/victoria-logs -retentionPeriod=8w
+```
+
+VictoriaLogs stores the [ingested](#data-ingestion) logs in per-day partition directories. It automatically drops partition directories
+outside the configured retention.
+
+VictoriaLogs automatically drops logs at [data ingestion](#data-ingestion) stage if they have timestamps outside the configured retention.
+A sample of dropped logs is logged with `WARN` message in order to simplify troubleshooting.
+The `vlinsert_rows_dropped_total` [metric](#monitoring) is incremented each time an ingested log entry is dropped because of timestamp outside the retention.
+It is recommended setting up the following alerting rule at [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to be notified
+when logs with wrong timestamps are ingested into VictoriaLogs:
+
+```metricsql
+rate(vlinsert_rows_dropped_total[5m]) > 0
+```
+
+By default VictoriaLogs doesn't accept log entries with timestamps bigger than `now+2d`, e.g. 2 days in the future.
+If you need accepting logs with bigger timestamps, then specify the desired "future retention" via `-futureRetention` command-line flag.
+This flag accepts values starting from `1d`. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations)
+for the supported duration formats.
+
+For example, the following command starts VictoriaLogs, which accepts logs with timestamps up to a year in the future:
+
+```bash
+/path/to/victoria-logs -futureRetention=1y
+```
+
+### Storage
+
+VictoriaLogs stores all its data in a single directory - `victoria-logs-data`. The path to the directory can be changed via `-storageDataPath` command-line flag.
+For example, the following command starts VictoriaLogs, which stores the data at `/var/lib/victoria-logs`:
+
+```bash
+/path/to/victoria-logs -storageDataPath=/var/lib/victoria-logs
+```
+
+VictoriaLogs automatically creates the `-storageDataPath` directory on the first run if it is missing.
diff --git a/docs/VictoriaLogs/Roadmap.md b/docs/VictoriaLogs/Roadmap.md
new file mode 100644
index 000000000..1fb685399
--- /dev/null
+++ b/docs/VictoriaLogs/Roadmap.md
@@ -0,0 +1,37 @@
+# VictoriaLogs roadmap
+
+The VictoriaLogs Preview is ready for evaluation in production. It is recommended running it alongside the existing solutions
+such as ElasticSearch and Grafana Loki and comparing their resource usage and usability.
+It isn't recommended migrating from existing solutions to VictoriaLogs Preview yet.
+
+The following functionality is available in VictoriaLogs Preview:
+
+- [Data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+- [Querying](https://docs.victoriametrics.com/VictoriaLogs/#querying).
+- [Querying via command-line](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
+
+See [operation docs](https://docs.victoriametrics.com/VictoriaLogs/#operation) for details.
+
+The following functionality is planned in the future versions of VictoriaLogs:
+
+- Support for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) from popular log collectors and formats:
+ - Promtail (aka Grafana Loki)
+ - Vector.dev
+ - Fluentbit
+ - Fluentd
+ - Syslog
+- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
+ - [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
+ - [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
+ - [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters).
+ - [Stats calculations](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stats).
+ - [Sorting](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting).
+ - [Limiters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#limiters).
+ - The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function.
+- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
+- Web UI with the following abilities:
+ - Explore the ingested logs.
+ - Build graphs over time for the ingested logs.
+- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).
+- Cluster version of VictoriaLogs.
+- Ability to store data to object storage (such as S3, GCS, Minio).
diff --git a/docs/VictoriaLogs/keyConcepts.md b/docs/VictoriaLogs/keyConcepts.md
new file mode 100644
index 000000000..685fe7749
--- /dev/null
+++ b/docs/VictoriaLogs/keyConcepts.md
@@ -0,0 +1,219 @@
+# VictoriaLogs key concepts
+
+## Data model
+
+VictoriaLogs works with structured logs. Every log entry may contain arbitrary number of `key=value` pairs (aka fields).
+A single log entry can be expressed as a single-level [JSON](https://www.json.org/json-en.html) object with string keys and values.
+For example:
+
+```json
+{
+ "job": "my-app",
+ "instance": "host123:4567",
+ "level": "error",
+ "client_ip": "1.2.3.4",
+ "trace_id": "1234-56789-abcdef",
+ "_msg": "failed to serve the client request"
+}
+```
+
+VictoriaLogs automatically transforms multi-level JSON (aka nested JSON) into single-level JSON
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) according to the following rules:
+
+- Nested dictionaries are flattened by concatenating dictionary keys with `.` char. For example, the following multi-level JSON
+ is transformed into the following single-level JSON:
+
+ ```json
+ {
+ "host": {
+ "name": "foobar"
+ "os": {
+ "version": "1.2.3"
+ }
+ }
+ }
+ ```
+
+ ```json
+ {
+ "host.name": "foobar",
+ "host.os.version": "1.2.3"
+ }
+ ```
+
+- Arrays, numbers and boolean values are converted into strings. This simplifies [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) over such values.
+ For example, the following JSON with an array, a number and a boolean value is converted into the following JSON with string values:
+
+ ```json
+ {
+ "tags": ["foo", "bar"],
+ "offset": 12345,
+ "is_error": false
+ }
+ ```
+
+ ```json
+ {
+ "tags": "[\"foo\", \"bar\"]",
+ "offset": "12345",
+ "is_error": "false"
+ }
+ ```
+
+Both label name and label value may contain arbitrary chars. Such chars must be encoded
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
+according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
+Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:
+
+```json
+{
+ "label with whitepsace": "value\nwith\nnewlines",
+ "Поле": "价值",
+}
+```
+
+VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) logs.
+This enables [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) across all the fields.
+
+VictoriaLogs supports the following field types:
+
+* [`_msg` field](#message-field)
+* [`_time` field](#time-field)
+* [`_stream` fields](#stream-fields)
+* [other fields](#other-fields)
+
+
+### Message field
+
+Every ingested [log entry](#data-model) must contain at least a `_msg` field with the actual log message. For example, this is the minimal
+log entry, which can be ingested into VictoriaLogs:
+
+```json
+{
+ "_msg": "some log message"
+}
+```
+
+If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field
+via `_msg_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+
+### Time field
+
+The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry.
+For example:
+
+```json
+{
+ "_msg": "some log message",
+ "_time": "2023-04-12T06:38:11.095Z"
+}
+```
+
+If the actual timestamp has other than `_time` field name, then it is possible to specify the real timestamp
+field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+For example, if timestamp is located in the `event.created` field, then specify `_time_field=event.created` query arg
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+
+If `_time` field is missing, then the data ingestion time is used as log entry timestamp.
+
+The log entry timestamp allows quickly narrowing down the search to a particular time range.
+See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details.
+
+### Stream fields
+
+Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries.
+This may be either a single field such as `instance=host123:456` or a set of fields such as
+`(datacenter=..., env=..., job=..., instance=...)` or
+`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`.
+
+Log entries received from a single application instance form a log stream in VictoriaLogs.
+VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits:
+
+- Reduced disk space usage, since a log stream from a single application instance is usually compressed better
+ than a mixed log stream from multiple distinct applications.
+
+- Increased query performance, since VictoriaLogs needs to scan lower amounts of data
+ when [searching by stream labels](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
+
+VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream,
+so it stores all the received log entries in a single default stream - `{}`.
+This may lead to not-so-optimal resource usage and query performance.
+
+Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
+during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
+For example, if logs from Kubernetes containers have the following fields:
+
+```json
+{
+ "kubernetes.namespace": "some-namespace",
+ "kubernetes.node.name": "some-node",
+ "kubernetes.pod.name": "some-pod",
+ "kubernetes.container.name": "some-container",
+ "_msg": "some log message"
+}
+```
+
+then sepcify `_stream_fields=kubernetes.namespace,kubernetes.node.name,kubernetes.pod.name,kubernetes.container.name`
+query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) in order to properly store
+per-container logs into distinct streams.
+
+#### How to determine which fields must be associated with log streams?
+
+[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions:
+
+- Fields, which remain constant across log entries received from a single application instance.
+- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc.
+
+Sometimes a single application instance may generate multiple log streams and store them into distinct log files.
+In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields.
+
+Structured logs may contain big number of fields, which do not change across log entries received from a single application instance.
+There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify
+the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
+can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
+
+Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
+
+#### High cardinality
+
+Some fields in the [ingested logs](#data-model) may contain big number of unique values across log entries.
+For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values.
+VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields).
+
+Never associate high-cardinality fields with [log streams](#stream-fields), since this may result
+to the following issues:
+
+- Performance degradation during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
+ and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying)
+- Increased memory usage
+- Increased CPU usage
+- Increased disk space usage
+- Increased disk read / write IO
+
+VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriametrics.com/VictoriaLogs/#monitoring),
+which shows the number of created streams since the last VictoriaLogs restart. If this metric grows at a rapid rate
+during long period of time, then there are high chances of high cardinality issues mentioned above.
+VictoriaLogs can log all the newly registered streams when `-logNewStreams` command-line flag is passed to it.
+This can help narrowing down and eliminating high-cardinality fields from [log streams](#stream-fields).
+
+### Other fields
+
+The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries.
+For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message.
+E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
+
+See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for more details.
+
+## Multitenancy
+
+VictoriaLogs supports multitenancy. A tenant is identified by `(AccountID, ProjectID)` pair, where `AccountID` and `ProjectID` are arbitrary 32-bit unsigned integeres.
+The `AccountID` and `ProjectID` fields can be set during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
+and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying) via `AccountID` and `ProjectID` request headers.
+
+If `AccountID` and/or `ProjectID` request headers aren't set, then the default `0` value is used.
+
+VictoriaLogs has very low overhead for per-tenant management, so it is OK to have thousands of tenants in a single VictoriaLogs instance.
+
+VictoriaLogs doesn't perform per-tenant authorization. Use [vmauth](https://docs.victoriametrics.com/vmauth.html) or similar tools for per-tenant authorization.
diff --git a/lib/logstorage/arena.go b/lib/logstorage/arena.go
new file mode 100644
index 000000000..ab68b747b
--- /dev/null
+++ b/lib/logstorage/arena.go
@@ -0,0 +1,31 @@
+package logstorage
+
+import (
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+)
+
+type arena struct {
+ b []byte
+}
+
+func (a *arena) reset() {
+ a.b = a.b[:0]
+}
+
+func (a *arena) copyBytes(b []byte) []byte {
+ ab := a.b
+ abLen := len(ab)
+ ab = append(ab, b...)
+ result := ab[abLen:]
+ a.b = ab
+ return result
+}
+
+func (a *arena) newBytes(size int) []byte {
+ ab := a.b
+ abLen := len(ab)
+ ab = bytesutil.ResizeWithCopyMayOverallocate(ab, abLen+size)
+ result := ab[abLen:]
+ a.b = ab
+ return result
+}
diff --git a/lib/logstorage/block.go b/lib/logstorage/block.go
new file mode 100644
index 000000000..83834c895
--- /dev/null
+++ b/lib/logstorage/block.go
@@ -0,0 +1,650 @@
+package logstorage
+
+import (
+ "fmt"
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// block represents a block of log entries.
+type block struct {
+ // timestamps contains timestamps for log entries.
+ timestamps []int64
+
+ // columns contains values for fields seen in log entries.
+ columns []column
+
+ // constColumns contains fields with constant values across all the block entries.
+ constColumns []Field
+}
+
+func (b *block) reset() {
+ b.timestamps = b.timestamps[:0]
+
+ cs := b.columns
+ for i := range cs {
+ cs[i].reset()
+ }
+ b.columns = cs[:0]
+
+ ccs := b.constColumns
+ for i := range ccs {
+ ccs[i].Reset()
+ }
+ b.constColumns = ccs[:0]
+}
+
+// uncompressedSizeBytes returns the total size of the origianl log entries stored in b.
+//
+// It is supposed that every log entry has the following format:
+//
+// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
+func (b *block) uncompressedSizeBytes() uint64 {
+ rowsCount := uint64(b.Len())
+
+ // Take into account timestamps
+ n := rowsCount * uint64(len(time.RFC3339Nano))
+
+ // Take into account columns
+ cs := b.columns
+ for i := range cs {
+ c := &cs[i]
+ nameLen := uint64(len(c.name))
+ if nameLen == 0 {
+ nameLen = uint64(len("_msg"))
+ }
+ for _, v := range c.values {
+ if len(v) > 0 {
+ n += nameLen + 2 + uint64(len(v))
+ }
+ }
+ }
+
+ // Take into account constColumns
+ ccs := b.constColumns
+ for i := range ccs {
+ cc := &ccs[i]
+ nameLen := uint64(len(cc.Name))
+ if nameLen == 0 {
+ nameLen = uint64(len("_msg"))
+ }
+ n += rowsCount * (2 + nameLen + uint64(len(cc.Value)))
+ }
+
+ return n
+}
+
+// uncompressedRowsSizeBytes returns the size of the uncompressed rows.
+//
+// It is supposed that every row has the following format:
+//
+// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
+func uncompressedRowsSizeBytes(rows [][]Field) uint64 {
+ n := uint64(0)
+ for _, fields := range rows {
+ n += uncompressedRowSizeBytes(fields)
+ }
+ return n
+}
+
+// uncompressedRowSizeBytes returns the size of uncompressed row.
+//
+// It is supposed that the row has the following format:
+//
+// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
+func uncompressedRowSizeBytes(fields []Field) uint64 {
+ n := uint64(len(time.RFC3339Nano)) // log timestamp
+ for _, f := range fields {
+ nameLen := len(f.Name)
+ if nameLen == 0 {
+ nameLen = len("_msg")
+ }
+ n += uint64(2 + nameLen + len(f.Value))
+ }
+ return n
+}
+
+// column contains values for the given field name seen in log entries.
+type column struct {
+ // name is the field name
+ name string
+
+ // values is the values seen for the given log entries.
+ values []string
+}
+
+func (c *column) reset() {
+ c.name = ""
+
+ values := c.values
+ for i := range values {
+ values[i] = ""
+ }
+ c.values = values[:0]
+}
+
+func (c *column) areSameValues() bool {
+ values := c.values
+ if len(values) < 2 {
+ return true
+ }
+ value := values[0]
+ for _, v := range values[1:] {
+ if value != v {
+ return false
+ }
+ }
+ return true
+}
+
+func (c *column) resizeValues(valuesLen int) []string {
+ values := c.values
+ if n := valuesLen - cap(values); n > 0 {
+ values = append(values[:cap(values)], make([]string, n)...)
+ }
+ values = values[:valuesLen]
+ c.values = values
+ return values
+}
+
+// mustWriteTo writes c to sw and updates ch accordingly.
+func (c *column) mustWriteTo(ch *columnHeader, sw *streamWriters) {
+ ch.reset()
+
+ valuesWriter := &sw.fieldValuesWriter
+ bloomFilterWriter := &sw.fieldBloomFilterWriter
+ if c.name == "" {
+ valuesWriter = &sw.messageValuesWriter
+ bloomFilterWriter = &sw.messageBloomFilterWriter
+ }
+
+ ch.name = c.name
+
+ // encode values
+ ve := getValuesEncoder()
+ ch.valueType, ch.minValue, ch.maxValue = ve.encode(c.values, &ch.valuesDict)
+
+ bb := longTermBufPool.Get()
+ defer longTermBufPool.Put(bb)
+
+ // marshal values
+ bb.B = marshalStringsBlock(bb.B[:0], ve.values)
+ putValuesEncoder(ve)
+ ch.valuesSize = uint64(len(bb.B))
+ if ch.valuesSize > maxValuesBlockSize {
+ logger.Panicf("BUG: too valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize)
+ }
+ ch.valuesOffset = valuesWriter.bytesWritten
+ valuesWriter.MustWrite(bb.B)
+
+ // create and marshal bloom filter for c.values
+ if ch.valueType != valueTypeDict {
+ tokensBuf := getTokensBuf()
+ tokensBuf.A = tokenizeStrings(tokensBuf.A[:0], c.values)
+ bb.B = bloomFilterMarshal(bb.B[:0], tokensBuf.A)
+ putTokensBuf(tokensBuf)
+ } else {
+ // there is no need in ecoding bloom filter for dictiory type,
+ // since it isn't used during querying - all the dictionary values are available in ch.valuesDict
+ bb.B = bb.B[:0]
+ }
+ ch.bloomFilterSize = uint64(len(bb.B))
+ if ch.bloomFilterSize > maxBloomFilterBlockSize {
+ logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize)
+ }
+ ch.bloomFilterOffset = bloomFilterWriter.bytesWritten
+ bloomFilterWriter.MustWrite(bb.B)
+}
+
+func (b *block) assertValid() {
+ // Check that timestamps are in ascending order
+ timestamps := b.timestamps
+ for i := 1; i < len(timestamps); i++ {
+ if timestamps[i-1] > timestamps[i] {
+ logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d",
+ timestamps[i-1], timestamps[i])
+ }
+ }
+
+ // Check that the number of items in each column matches the number of items in the block.
+ itemsCount := len(timestamps)
+ columns := b.columns
+ for _, c := range columns {
+ if len(c.values) != itemsCount {
+ logger.Panicf("BUG: unexpected number of values for column %q: got %d; want %d", c.name, len(c.values), itemsCount)
+ }
+ }
+}
+
+// MustInitFromRows initializes b from the given timestamps and rows.
+//
+// It is expected that timestamps are sorted.
+func (b *block) MustInitFromRows(timestamps []int64, rows [][]Field) {
+ b.reset()
+
+ assertTimestampsSorted(timestamps)
+ b.timestamps = append(b.timestamps, timestamps...)
+ b.mustInitFromRows(rows)
+ b.sortColumnsByName()
+}
+
+func (b *block) mustInitFromRows(rows [][]Field) {
+ rowsLen := len(rows)
+ if rowsLen == 0 {
+ // Nothing to do
+ return
+ }
+
+ if areSameFieldsInRows(rows) {
+ // Fast path - all the log entries have the same fields
+ fields := rows[0]
+ for i := range fields {
+ f := &fields[i]
+ if areSameValuesForColumn(rows, i) {
+ cc := b.extendConstColumns()
+ cc.Name = f.Name
+ cc.Value = f.Value
+ } else {
+ c := b.extendColumns()
+ c.name = f.Name
+ values := c.resizeValues(rowsLen)
+ for j := range rows {
+ values[j] = rows[j][i].Value
+ }
+ }
+ }
+ return
+ }
+
+ // Slow path - log entries contain different set of fields
+
+ // Determine indexes for columns
+ columnIdxs := getColumnIdxs()
+ for i := range rows {
+ fields := rows[i]
+ for j := range fields {
+ name := fields[j].Name
+ if _, ok := columnIdxs[name]; !ok {
+ columnIdxs[name] = len(columnIdxs)
+ }
+ }
+ }
+
+ // Initialize columns
+ cs := b.resizeColumns(len(columnIdxs))
+ for name, idx := range columnIdxs {
+ c := &cs[idx]
+ c.name = name
+ c.resizeValues(rowsLen)
+ }
+
+ // Write rows to block
+ for i := range rows {
+ for _, f := range rows[i] {
+ idx := columnIdxs[f.Name]
+ cs[idx].values[i] = f.Value
+ }
+ }
+ putColumnIdxs(columnIdxs)
+
+ // Detect const columns
+ for i := len(cs) - 1; i >= 0; i-- {
+ c := &cs[i]
+ if !c.areSameValues() {
+ continue
+ }
+ cc := b.extendConstColumns()
+ cc.Name = c.name
+ cc.Value = c.values[0]
+
+ c.reset()
+ if i < len(cs)-1 {
+ swapColumns(c, &cs[len(cs)-1])
+ }
+ cs = cs[:len(cs)-1]
+ }
+ b.columns = cs
+}
+
+func swapColumns(a, b *column) {
+ *a, *b = *b, *a
+}
+
+func areSameValuesForColumn(rows [][]Field, colIdx int) bool {
+ if len(rows) < 2 {
+ return true
+ }
+ value := rows[0][colIdx].Value
+ rows = rows[1:]
+ for i := range rows {
+ if value != rows[i][colIdx].Value {
+ return false
+ }
+ }
+ return true
+}
+
+func assertTimestampsSorted(timestamps []int64) {
+ for i := range timestamps {
+ if i > 0 && timestamps[i-1] > timestamps[i] {
+ logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d",
+ timestamps[i-1], timestamps[i])
+ }
+ }
+}
+
+func (b *block) extendConstColumns() *Field {
+ ccs := b.constColumns
+ if cap(ccs) > len(ccs) {
+ ccs = ccs[:len(ccs)+1]
+ } else {
+ ccs = append(ccs, Field{})
+ }
+ b.constColumns = ccs
+ return &ccs[len(ccs)-1]
+}
+
+func (b *block) extendColumns() *column {
+ cs := b.columns
+ if cap(cs) > len(cs) {
+ cs = cs[:len(cs)+1]
+ } else {
+ cs = append(cs, column{})
+ }
+ b.columns = cs
+ return &cs[len(cs)-1]
+}
+
+func (b *block) resizeColumns(columnsLen int) []column {
+ cs := b.columns[:0]
+ if n := columnsLen - cap(cs); n > 0 {
+ cs = append(cs[:cap(cs)], make([]column, n)...)
+ }
+ cs = cs[:columnsLen]
+ b.columns = cs
+ return cs
+}
+
+func (b *block) sortColumnsByName() {
+ if len(b.columns)+len(b.constColumns) > maxColumnsPerBlock {
+ logger.Panicf("BUG: too big number of columns detected in the block: %d; the number of columns mustn't exceed %d",
+ len(b.columns)+len(b.constColumns), maxColumnsPerBlock)
+ }
+
+ cs := getColumnsSorter()
+ cs.columns = b.columns
+ sort.Sort(cs)
+ putColumnsSorter(cs)
+
+ ccs := getConstColumnsSorter()
+ ccs.columns = b.constColumns
+ sort.Sort(ccs)
+ putConstColumnsSorter(ccs)
+}
+
+// Len returns the number of log entries in b.
+func (b *block) Len() int {
+ return len(b.timestamps)
+}
+
+// InitFromBlockData unmarshals bd to b.
+//
+// sbu and vd are used as a temporary storage for unmarshaled column values.
+//
+// The b becomes outdated after sbu or vd is reset.
+func (b *block) InitFromBlockData(bd *blockData, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error {
+ b.reset()
+
+ if bd.rowsCount > maxRowsPerBlock {
+ return fmt.Errorf("too many entries found in the block: %d; mustn't exceed %d", bd.rowsCount, maxRowsPerBlock)
+ }
+ rowsCount := int(bd.rowsCount)
+
+ // unmarshal timestamps
+ td := &bd.timestampsData
+ var err error
+ b.timestamps, err = encoding.UnmarshalTimestamps(b.timestamps[:0], td.data, td.marshalType, td.minTimestamp, rowsCount)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal timestamps: %w", err)
+ }
+
+ // unmarshal columns
+ cds := bd.columnsData
+ cs := b.resizeColumns(len(cds))
+ for i := range cds {
+ cd := &cds[i]
+ c := &cs[i]
+ c.name = cd.name
+ c.values, err = sbu.unmarshal(c.values[:0], cd.valuesData, uint64(rowsCount))
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal column %d: %w", i, err)
+ }
+ if err = vd.decodeInplace(c.values, cd.valueType, &cd.valuesDict); err != nil {
+ return fmt.Errorf("cannot decode column values: %w", err)
+ }
+ }
+
+ // unmarshal constColumns
+ b.constColumns = append(b.constColumns[:0], bd.constColumns...)
+
+ return nil
+}
+
+// mustWriteTo writes b with the given sid to sw and updates bh accordingly
+func (b *block) mustWriteTo(sid *streamID, bh *blockHeader, sw *streamWriters) {
+ // Do not store the version used for encoding directly in the block data, since:
+ // - all the blocks in the same part use the same encoding
+ // - the block encoding version can be put in metadata file for the part (aka metadataFilename)
+
+ b.assertValid()
+ bh.reset()
+
+ bh.streamID = *sid
+ bh.uncompressedSizeBytes = b.uncompressedSizeBytes()
+ bh.rowsCount = uint64(b.Len())
+
+ // Marshal timestamps
+ mustWriteTimestampsTo(&bh.timestampsHeader, b.timestamps, sw)
+
+ // Marshal columns
+ cs := b.columns
+ csh := getColumnsHeader()
+ chs := csh.resizeColumnHeaders(len(cs))
+ for i := range cs {
+ cs[i].mustWriteTo(&chs[i], sw)
+ }
+ csh.constColumns = append(csh.constColumns[:0], b.constColumns...)
+
+ bb := longTermBufPool.Get()
+ bb.B = csh.marshal(bb.B)
+ putColumnsHeader(csh)
+ bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten
+ bh.columnsHeaderSize = uint64(len(bb.B))
+ if bh.columnsHeaderSize > maxColumnsHeaderSize {
+ logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize)
+ }
+ sw.columnsHeaderWriter.MustWrite(bb.B)
+ longTermBufPool.Put(bb)
+}
+
+// appendRows appends log entries from b to dst.
+func (b *block) appendRows(dst *rows) {
+ // copy timestamps
+ dst.timestamps = append(dst.timestamps, b.timestamps...)
+
+ // copy columns
+ fieldsBuf := dst.fieldsBuf
+ ccs := b.constColumns
+ cs := b.columns
+ for i := range b.timestamps {
+ fieldsLen := len(fieldsBuf)
+ // copy const columns
+ for j := range ccs {
+ cc := &ccs[j]
+ fieldsBuf = append(fieldsBuf, Field{
+ Name: cc.Name,
+ Value: cc.Value,
+ })
+ }
+ // copy other columns
+ for j := range cs {
+ c := &cs[j]
+ value := c.values[i]
+ if len(value) == 0 {
+ continue
+ }
+ fieldsBuf = append(fieldsBuf, Field{
+ Name: c.name,
+ Value: value,
+ })
+ }
+ dst.rows = append(dst.rows, fieldsBuf[fieldsLen:])
+ }
+ dst.fieldsBuf = fieldsBuf
+}
+
+func areSameFieldsInRows(rows [][]Field) bool {
+ if len(rows) < 2 {
+ return true
+ }
+ fields := rows[0]
+ rows = rows[1:]
+ for i := range rows {
+ leFields := rows[i]
+ if len(fields) != len(leFields) {
+ return false
+ }
+ for j := range leFields {
+ if leFields[j].Name != fields[j].Name {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+var columnIdxsPool sync.Pool
+
+func getColumnIdxs() map[string]int {
+ v := columnIdxsPool.Get()
+ if v == nil {
+ return make(map[string]int)
+ }
+ return v.(map[string]int)
+}
+
+func putColumnIdxs(m map[string]int) {
+ for k := range m {
+ delete(m, k)
+ }
+ columnIdxsPool.Put(m)
+}
+
+func getBlock() *block {
+ v := blockPool.Get()
+ if v == nil {
+ return &block{}
+ }
+ return v.(*block)
+}
+
+func putBlock(b *block) {
+ b.reset()
+ blockPool.Put(b)
+}
+
+var blockPool sync.Pool
+
+type columnsSorter struct {
+ columns []column
+}
+
+func (cs *columnsSorter) reset() {
+ cs.columns = nil
+}
+
+func (cs *columnsSorter) Len() int {
+ return len(cs.columns)
+}
+
+func (cs *columnsSorter) Less(i, j int) bool {
+ columns := cs.columns
+ return columns[i].name < columns[j].name
+}
+
+func (cs *columnsSorter) Swap(i, j int) {
+ columns := cs.columns
+ columns[i], columns[j] = columns[j], columns[i]
+}
+
+func getColumnsSorter() *columnsSorter {
+ v := columnsSorterPool.Get()
+ if v == nil {
+ return &columnsSorter{}
+ }
+ return v.(*columnsSorter)
+}
+
+func putColumnsSorter(cs *columnsSorter) {
+ cs.reset()
+ columnsSorterPool.Put(cs)
+}
+
+var columnsSorterPool sync.Pool
+
+type constColumnsSorter struct {
+ columns []Field
+}
+
+func (ccs *constColumnsSorter) reset() {
+ ccs.columns = nil
+}
+
+func (ccs *constColumnsSorter) Len() int {
+ return len(ccs.columns)
+}
+
+func (ccs *constColumnsSorter) Less(i, j int) bool {
+ columns := ccs.columns
+ return columns[i].Name < columns[j].Name
+}
+
+func (ccs *constColumnsSorter) Swap(i, j int) {
+ columns := ccs.columns
+ columns[i], columns[j] = columns[j], columns[i]
+}
+
+func getConstColumnsSorter() *constColumnsSorter {
+ v := constColumnsSorterPool.Get()
+ if v == nil {
+ return &constColumnsSorter{}
+ }
+ return v.(*constColumnsSorter)
+}
+
+func putConstColumnsSorter(ccs *constColumnsSorter) {
+ ccs.reset()
+ constColumnsSorterPool.Put(ccs)
+}
+
+var constColumnsSorterPool sync.Pool
+
+// mustWriteTimestampsTo writes timestamps to sw and updates th accordingly
+func mustWriteTimestampsTo(th *timestampsHeader, timestamps []int64, sw *streamWriters) {
+ th.reset()
+
+ bb := longTermBufPool.Get()
+ bb.B, th.marshalType, th.minTimestamp = encoding.MarshalTimestamps(bb.B[:0], timestamps, 64)
+ if len(bb.B) > maxTimestampsBlockSize {
+ logger.Panicf("BUG: too big block with timestamps: %d bytes; the maximum supported size is %d bytes", len(bb.B), maxTimestampsBlockSize)
+ }
+ th.maxTimestamp = timestamps[len(timestamps)-1]
+ th.blockOffset = sw.timestampsWriter.bytesWritten
+ th.blockSize = uint64(len(bb.B))
+ sw.timestampsWriter.MustWrite(bb.B)
+ longTermBufPool.Put(bb)
+}
diff --git a/lib/logstorage/block_data.go b/lib/logstorage/block_data.go
new file mode 100644
index 000000000..6eeb14a30
--- /dev/null
+++ b/lib/logstorage/block_data.go
@@ -0,0 +1,383 @@
+package logstorage
+
+import (
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// blockData contains packed data for a single block.
+//
+// The main purpose of this struct is to reduce the work needed during background merge of parts.
+// If the block is full, then the blockData can be written to the destination part
+// without the need to unpack it.
+type blockData struct {
+ // streamID is id of the stream for the data
+ streamID streamID
+
+ // uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block
+ uncompressedSizeBytes uint64
+
+ // rowsCount is the number of log entries in the block
+ rowsCount uint64
+
+ // timestampsData contains the encoded timestamps data for the block
+ timestampsData timestampsData
+
+ // columnsData contains packed per-column data.
+ columnsData []columnData
+
+ // constColumns contains data for const columns across the block.
+ constColumns []Field
+
+ // a is used for storing byte slices for timestamps and columns.
+ //
+ // It reduces fragmentation for them.
+ a arena
+}
+
+// reset resets bd for subsequent re-use
+func (bd *blockData) reset() {
+ bd.streamID.reset()
+ bd.uncompressedSizeBytes = 0
+ bd.rowsCount = 0
+ bd.timestampsData.reset()
+
+ cds := bd.columnsData
+ for i := range cds {
+ cds[i].reset()
+ }
+ bd.columnsData = cds[:0]
+
+ ccs := bd.constColumns
+ for i := range ccs {
+ ccs[i].Reset()
+ }
+ bd.constColumns = ccs[:0]
+
+ bd.a.reset()
+}
+
+func (bd *blockData) resizeColumnsData(columnsDataLen int) []columnData {
+ cds := bd.columnsData
+ if n := columnsDataLen - cap(cds); n > 0 {
+ cds = append(cds[:cap(cds)], make([]columnData, n)...)
+ }
+ cds = cds[:columnsDataLen]
+ bd.columnsData = cds
+ return cds
+}
+
+// copyFrom copies src to bd.
+func (bd *blockData) copyFrom(src *blockData) {
+ bd.reset()
+
+ bd.streamID = src.streamID
+ bd.uncompressedSizeBytes = src.uncompressedSizeBytes
+ bd.rowsCount = src.rowsCount
+ bd.timestampsData.copyFrom(&src.timestampsData, &bd.a)
+
+ cdsSrc := src.columnsData
+ cds := bd.resizeColumnsData(len(cdsSrc))
+ for i := range cds {
+ cds[i].copyFrom(&cdsSrc[i], &bd.a)
+ }
+ bd.columnsData = cds
+
+ bd.constColumns = append(bd.constColumns[:0], src.constColumns...)
+}
+
+// unmarshalRows appends unmarshaled from bd log entries to dst.
+//
+// The returned log entries are valid until sbu and vd are valid.
+func (bd *blockData) unmarshalRows(dst *rows, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error {
+ b := getBlock()
+ defer putBlock(b)
+
+ if err := b.InitFromBlockData(bd, sbu, vd); err != nil {
+ return err
+ }
+ b.appendRows(dst)
+ return nil
+}
+
+// mustWriteTo writes bd with the given sid to sw and updates bh accordingly
+func (bd *blockData) mustWriteTo(bh *blockHeader, sw *streamWriters) {
+ // Do not store the version used for encoding directly in the block data, since:
+ // - all the blocks in the same part use the same encoding
+ // - the block encoding version can be put in metadata file for the part (aka metadataFilename)
+
+ bh.reset()
+
+ bh.streamID = bd.streamID
+ bh.uncompressedSizeBytes = bd.uncompressedSizeBytes
+ bh.rowsCount = bd.rowsCount
+
+ // Marshal timestamps
+ bd.timestampsData.mustWriteTo(&bh.timestampsHeader, sw)
+
+ // Marshal columns
+ cds := bd.columnsData
+ csh := getColumnsHeader()
+ chs := csh.resizeColumnHeaders(len(cds))
+ for i := range cds {
+ cds[i].mustWriteTo(&chs[i], sw)
+ }
+ csh.constColumns = append(csh.constColumns[:0], bd.constColumns...)
+
+ bb := longTermBufPool.Get()
+ bb.B = csh.marshal(bb.B)
+ putColumnsHeader(csh)
+ bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten
+ bh.columnsHeaderSize = uint64(len(bb.B))
+ if bh.columnsHeaderSize > maxColumnsHeaderSize {
+ logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize)
+ }
+ sw.columnsHeaderWriter.MustWrite(bb.B)
+ longTermBufPool.Put(bb)
+}
+
+// mustReadFrom reads block data associated with bh from sr to bd.
+func (bd *blockData) mustReadFrom(bh *blockHeader, sr *streamReaders) {
+ bd.reset()
+
+ bd.streamID = bh.streamID
+ bd.uncompressedSizeBytes = bh.uncompressedSizeBytes
+ bd.rowsCount = bh.rowsCount
+
+ // Read timestamps
+ bd.timestampsData.mustReadFrom(&bh.timestampsHeader, sr, &bd.a)
+
+ // Read columns
+ if bh.columnsHeaderOffset != sr.columnsHeaderReader.bytesRead {
+ logger.Panicf("FATAL: %s: unexpected columnsHeaderOffset=%d; must equal to the number of bytes read: %d",
+ sr.columnsHeaderReader.Path(), bh.columnsHeaderOffset, sr.columnsHeaderReader.bytesRead)
+ }
+ columnsHeaderSize := bh.columnsHeaderSize
+ if columnsHeaderSize > maxColumnsHeaderSize {
+ logger.Panicf("BUG: %s: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", sr.columnsHeaderReader.Path(), columnsHeaderSize, maxColumnsHeaderSize)
+ }
+ bb := longTermBufPool.Get()
+ bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize))
+ sr.columnsHeaderReader.MustReadFull(bb.B)
+
+ csh := getColumnsHeader()
+ if err := csh.unmarshal(bb.B); err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal columnsHeader: %s", sr.columnsHeaderReader.Path(), err)
+ }
+ longTermBufPool.Put(bb)
+ chs := csh.columnHeaders
+ cds := bd.resizeColumnsData(len(chs))
+ for i := range chs {
+ cds[i].mustReadFrom(&chs[i], sr, &bd.a)
+ }
+ bd.constColumns = append(bd.constColumns[:0], csh.constColumns...)
+ putColumnsHeader(csh)
+}
+
+// timestampsData contains the encoded timestamps data.
+type timestampsData struct {
+ // data contains packed timestamps data.
+ data []byte
+
+ // marshalType is the marshal type for timestamps
+ marshalType encoding.MarshalType
+
+ // minTimestamp is the minimum timestamp in the timestamps data
+ minTimestamp int64
+
+ // maxTimestamp is the maximum timestamp in the timestamps data
+ maxTimestamp int64
+}
+
+// reset resets td for subsequent re-use
+func (td *timestampsData) reset() {
+ td.data = nil
+ td.marshalType = 0
+ td.minTimestamp = 0
+ td.maxTimestamp = 0
+}
+
+// copyFrom copies src to td.
+func (td *timestampsData) copyFrom(src *timestampsData, a *arena) {
+ td.reset()
+
+ td.data = a.copyBytes(src.data)
+ td.marshalType = src.marshalType
+ td.minTimestamp = src.minTimestamp
+ td.maxTimestamp = src.maxTimestamp
+}
+
+// mustWriteTo writes td to sw and updates th accordingly
+func (td *timestampsData) mustWriteTo(th *timestampsHeader, sw *streamWriters) {
+ th.reset()
+
+ th.marshalType = td.marshalType
+ th.minTimestamp = td.minTimestamp
+ th.maxTimestamp = td.maxTimestamp
+ th.blockOffset = sw.timestampsWriter.bytesWritten
+ th.blockSize = uint64(len(td.data))
+ if th.blockSize > maxTimestampsBlockSize {
+ logger.Panicf("BUG: too big timestampsHeader.blockSize: %d bytes; mustn't exceed %d bytes", th.blockSize, maxTimestampsBlockSize)
+ }
+ sw.timestampsWriter.MustWrite(td.data)
+}
+
+// mustReadFrom reads timestamps data associated with th from sr to td.
+func (td *timestampsData) mustReadFrom(th *timestampsHeader, sr *streamReaders, a *arena) {
+ td.reset()
+
+ td.marshalType = th.marshalType
+ td.minTimestamp = th.minTimestamp
+ td.maxTimestamp = th.maxTimestamp
+
+ timestampsReader := &sr.timestampsReader
+ if th.blockOffset != timestampsReader.bytesRead {
+ logger.Panicf("FATAL: %s: unexpected timestampsHeader.blockOffset=%d; must equal to the number of bytes read: %d",
+ timestampsReader.Path(), th.blockOffset, timestampsReader.bytesRead)
+ }
+ timestampsBlockSize := th.blockSize
+ if timestampsBlockSize > maxTimestampsBlockSize {
+ logger.Panicf("FATAL: %s: too big timestamps block with %d bytes; the maximum supported block size is %d bytes",
+ timestampsReader.Path(), timestampsBlockSize, maxTimestampsBlockSize)
+ }
+ td.data = a.newBytes(int(timestampsBlockSize))
+ timestampsReader.MustReadFull(td.data)
+}
+
+// columnData contains packed data for a single column.
+type columnData struct {
+ // name is the column name
+ name string
+
+ // valueType is the type of values stored in valuesData
+ valueType valueType
+
+ // minValue is the minimum encoded uint* or float64 value in the columnHeader
+ //
+ // It is used for fast detection of whether the given columnHeader contains values in the given range
+ minValue uint64
+
+ // maxValue is the maximum encoded uint* or float64 value in the columnHeader
+ //
+ // It is used for fast detection of whether the given columnHeader contains values in the given range
+ maxValue uint64
+
+ // valuesDict contains unique values for valueType = valueTypeDict
+ valuesDict valuesDict
+
+ // valuesData contains packed values data for the given column
+ valuesData []byte
+
+ // bloomFilterData contains packed bloomFilter data for the given column
+ bloomFilterData []byte
+}
+
+// reset rests cd for subsequent re-use
+func (cd *columnData) reset() {
+ cd.name = ""
+ cd.valueType = 0
+
+ cd.minValue = 0
+ cd.maxValue = 0
+ cd.valuesDict.reset()
+
+ cd.valuesData = nil
+ cd.bloomFilterData = nil
+}
+
+// copyFrom copies src to cd.
+func (cd *columnData) copyFrom(src *columnData, a *arena) {
+ cd.reset()
+
+ cd.name = src.name
+ cd.valueType = src.valueType
+
+ cd.minValue = src.minValue
+ cd.maxValue = src.maxValue
+ cd.valuesDict.copyFrom(&src.valuesDict)
+
+ cd.valuesData = a.copyBytes(src.valuesData)
+ cd.bloomFilterData = a.copyBytes(src.bloomFilterData)
+}
+
+// mustWriteTo writes cd to sw and updates ch accordingly.
+func (cd *columnData) mustWriteTo(ch *columnHeader, sw *streamWriters) {
+ ch.reset()
+
+ valuesWriter := &sw.fieldValuesWriter
+ bloomFilterWriter := &sw.fieldBloomFilterWriter
+ if cd.name == "" {
+ valuesWriter = &sw.messageValuesWriter
+ bloomFilterWriter = &sw.messageBloomFilterWriter
+ }
+
+ ch.name = cd.name
+ ch.valueType = cd.valueType
+
+ ch.minValue = cd.minValue
+ ch.maxValue = cd.maxValue
+ ch.valuesDict.copyFrom(&cd.valuesDict)
+
+ // marshal values
+ ch.valuesSize = uint64(len(cd.valuesData))
+ if ch.valuesSize > maxValuesBlockSize {
+ logger.Panicf("BUG: too big valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize)
+ }
+ ch.valuesOffset = valuesWriter.bytesWritten
+ valuesWriter.MustWrite(cd.valuesData)
+
+ // marshal bloom filter
+ ch.bloomFilterSize = uint64(len(cd.bloomFilterData))
+ if ch.bloomFilterSize > maxBloomFilterBlockSize {
+ logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize)
+ }
+ ch.bloomFilterOffset = bloomFilterWriter.bytesWritten
+ bloomFilterWriter.MustWrite(cd.bloomFilterData)
+}
+
+// mustReadFrom reads columns data associated with ch from sr to cd.
+func (cd *columnData) mustReadFrom(ch *columnHeader, sr *streamReaders, a *arena) {
+ cd.reset()
+
+ valuesReader := &sr.fieldValuesReader
+ bloomFilterReader := &sr.fieldBloomFilterReader
+ if ch.name == "" {
+ valuesReader = &sr.messageValuesReader
+ bloomFilterReader = &sr.messageBloomFilterReader
+ }
+
+ cd.name = ch.name
+ cd.valueType = ch.valueType
+
+ cd.minValue = ch.minValue
+ cd.maxValue = ch.maxValue
+ cd.valuesDict.copyFrom(&ch.valuesDict)
+
+ // read values
+ if ch.valuesOffset != valuesReader.bytesRead {
+ logger.Panicf("FATAL: %s: unexpected columnHeader.valuesOffset=%d; must equal to the number of bytes read: %d",
+ valuesReader.Path(), ch.valuesOffset, valuesReader.bytesRead)
+ }
+ valuesSize := ch.valuesSize
+ if valuesSize > maxValuesBlockSize {
+ logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", valuesReader.Path(), maxValuesBlockSize, valuesSize)
+ }
+ cd.valuesData = a.newBytes(int(valuesSize))
+ valuesReader.MustReadFull(cd.valuesData)
+
+ // read bloom filter
+ // bloom filter is missing in valueTypeDict.
+ if ch.valueType != valueTypeDict {
+ if ch.bloomFilterOffset != bloomFilterReader.bytesRead {
+ logger.Panicf("FATAL: %s: unexpected columnHeader.bloomFilterOffset=%d; must equal to the number of bytes read: %d",
+ bloomFilterReader.Path(), ch.bloomFilterOffset, bloomFilterReader.bytesRead)
+ }
+ bloomFilterSize := ch.bloomFilterSize
+ if bloomFilterSize > maxBloomFilterBlockSize {
+ logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bloomFilterReader.Path(), maxBloomFilterBlockSize, bloomFilterSize)
+ }
+ cd.bloomFilterData = a.newBytes(int(bloomFilterSize))
+ bloomFilterReader.MustReadFull(cd.bloomFilterData)
+ }
+}
diff --git a/lib/logstorage/block_data_test.go b/lib/logstorage/block_data_test.go
new file mode 100644
index 000000000..975d8c486
--- /dev/null
+++ b/lib/logstorage/block_data_test.go
@@ -0,0 +1,106 @@
+package logstorage
+
+import (
+ "reflect"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+)
+
+func TestBlockDataReset(t *testing.T) {
+ bd := &blockData{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 432,
+ },
+ },
+ uncompressedSizeBytes: 2344,
+ rowsCount: 134,
+ timestampsData: timestampsData{
+ data: []byte("foo"),
+ marshalType: encoding.MarshalTypeDeltaConst,
+ minTimestamp: 1234,
+ maxTimestamp: 23443,
+ },
+ columnsData: []columnData{
+ {
+ name: "foo",
+ valueType: valueTypeUint16,
+ valuesData: []byte("aaa"),
+ bloomFilterData: []byte("bsdf"),
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "foo",
+ Value: "bar",
+ },
+ },
+ }
+ bd.reset()
+ bdZero := &blockData{
+ columnsData: []columnData{},
+ constColumns: []Field{},
+ }
+ if !reflect.DeepEqual(bd, bdZero) {
+ t.Fatalf("unexpected non-zero blockData after reset: %v", bd)
+ }
+}
+
+func TestBlockDataCopyFrom(t *testing.T) {
+ f := func(bd *blockData) {
+ t.Helper()
+ var bd2 blockData
+ bd2.copyFrom(bd)
+ bd2.a.b = nil
+ if !reflect.DeepEqual(bd, &bd2) {
+ t.Fatalf("unexpected blockData copy\ngot\n%v\nwant\n%v", &bd2, bd)
+ }
+
+ // Try copying it again to the same destination
+ bd2.copyFrom(bd)
+ bd2.a.b = nil
+ if !reflect.DeepEqual(bd, &bd2) {
+ t.Fatalf("unexpected blockData copy to the same destination\ngot\n%v\nwant\n%v", &bd2, bd)
+ }
+ }
+ f(&blockData{})
+
+ bd := &blockData{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 432,
+ },
+ },
+ uncompressedSizeBytes: 8943,
+ rowsCount: 134,
+ timestampsData: timestampsData{
+ data: []byte("foo"),
+ marshalType: encoding.MarshalTypeDeltaConst,
+ minTimestamp: 1234,
+ maxTimestamp: 23443,
+ },
+ columnsData: []columnData{
+ {
+ name: "foo",
+ valueType: valueTypeUint16,
+ valuesData: []byte("aaa"),
+ bloomFilterData: []byte("bsdf"),
+ },
+ {
+ name: "bar",
+ valuesData: []byte("aaa"),
+ bloomFilterData: []byte("bsdf"),
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "foobar",
+ Value: "baz",
+ },
+ },
+ }
+ f(bd)
+}
diff --git a/lib/logstorage/block_header.go b/lib/logstorage/block_header.go
new file mode 100644
index 000000000..490c79216
--- /dev/null
+++ b/lib/logstorage/block_header.go
@@ -0,0 +1,766 @@
+package logstorage
+
+import (
+ "fmt"
+ "math"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// blockHeader contains information about a single block.
+//
+// blockHeader is stored in the indexFilename file.
+type blockHeader struct {
+ // streamID is a stream id for entries in the block
+ streamID streamID
+
+ // uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block
+ uncompressedSizeBytes uint64
+
+ // rowsCount is the number of log entries stored in the block
+ rowsCount uint64
+
+ // timestampsHeader contains information about timestamps for log entries in the block
+ timestampsHeader timestampsHeader
+
+ // columnsHeaderOffset is the offset of columnsHeader at columnsHeaderFilename
+ columnsHeaderOffset uint64
+
+ // columnsHeaderSize is the size of columnsHeader at columnsHeaderFilename
+ columnsHeaderSize uint64
+}
+
+// reset resets bh, so it can be re-used.
+func (bh *blockHeader) reset() {
+ bh.streamID.reset()
+ bh.uncompressedSizeBytes = 0
+ bh.rowsCount = 0
+ bh.timestampsHeader.reset()
+ bh.columnsHeaderOffset = 0
+ bh.columnsHeaderSize = 0
+}
+
+func (bh *blockHeader) copyFrom(src *blockHeader) {
+ bh.reset()
+
+ bh.streamID = src.streamID
+ bh.uncompressedSizeBytes = src.uncompressedSizeBytes
+ bh.rowsCount = src.rowsCount
+ bh.timestampsHeader.copyFrom(&src.timestampsHeader)
+ bh.columnsHeaderOffset = src.columnsHeaderOffset
+ bh.columnsHeaderSize = src.columnsHeaderSize
+}
+
+// marshal appends the marshaled bh to dst and returns the result.
+func (bh *blockHeader) marshal(dst []byte) []byte {
+ // Do not store the version used for encoding directly in the block header, since:
+ // - all the block headers in the same part use the same encoding
+ // - the block header encoding version can be put in metadata file for the part (aka metadataFilename)
+
+ dst = bh.streamID.marshal(dst)
+ dst = encoding.MarshalVarUint64(dst, bh.uncompressedSizeBytes)
+ dst = encoding.MarshalVarUint64(dst, bh.rowsCount)
+ dst = bh.timestampsHeader.marshal(dst)
+ dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderOffset)
+ dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderSize)
+
+ return dst
+}
+
+// unmarshal unmarshals bh from src and returns the remaining tail.
+func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) {
+ bh.reset()
+
+ srcOrig := src
+
+ // unmarshal bh.streamID
+ tail, err := bh.streamID.unmarshal(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err)
+ }
+ src = tail
+
+ // unmarshal bh.uncompressedSizeBytes
+ tail, n, err := encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal uncompressedSizeBytes: %w", err)
+ }
+ bh.uncompressedSizeBytes = n
+ src = tail
+
+ // unmarshal bh.rowsCount
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal rowsCount: %w", err)
+ }
+ if n > maxRowsPerBlock {
+ return srcOrig, fmt.Errorf("too big value for rowsCount: %d; mustn't exceed %d", n, maxRowsPerBlock)
+ }
+ bh.rowsCount = n
+ src = tail
+
+ // unmarshal bh.timestampsHeader
+ tail, err = bh.timestampsHeader.unmarshal(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal timestampsHeader: %w", err)
+ }
+ src = tail
+
+ // unmarshal columnsHeaderOffset
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderOffset: %w", err)
+ }
+ bh.columnsHeaderOffset = n
+ src = tail
+
+ // unmarshal columnsHeaderSize
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderSize: %w", err)
+ }
+ if n > maxColumnsHeaderSize {
+ return srcOrig, fmt.Errorf("too big value for columnsHeaderSize: %d; mustn't exceed %d", n, maxColumnsHeaderSize)
+ }
+ bh.columnsHeaderSize = n
+ src = tail
+
+ return src, nil
+}
+
+func getBlockHeader() *blockHeader {
+ v := blockHeaderPool.Get()
+ if v == nil {
+ return &blockHeader{}
+ }
+ return v.(*blockHeader)
+}
+
+func putBlockHeader(bh *blockHeader) {
+ bh.reset()
+ blockHeaderPool.Put(bh)
+}
+
+var blockHeaderPool sync.Pool
+
+// unmarshalBlockHeaders appends unmarshaled from src blockHeader entries to dst and returns the result.
+func unmarshalBlockHeaders(dst []blockHeader, src []byte) ([]blockHeader, error) {
+ dstOrig := dst
+ for len(src) > 0 {
+ if len(dst) < cap(dst) {
+ dst = dst[:len(dst)+1]
+ } else {
+ dst = append(dst, blockHeader{})
+ }
+ bh := &dst[len(dst)-1]
+ tail, err := bh.unmarshal(src)
+ if err != nil {
+ return dstOrig, fmt.Errorf("cannot unmarshal blockHeader entries: %w", err)
+ }
+ src = tail
+ }
+ if err := validateBlockHeaders(dst[len(dstOrig):]); err != nil {
+ return dstOrig, err
+ }
+ return dst, nil
+}
+
+func validateBlockHeaders(bhs []blockHeader) error {
+ for i := 1; i < len(bhs); i++ {
+ bhCurr := &bhs[i]
+ bhPrev := &bhs[i-1]
+ if bhCurr.streamID.less(&bhPrev.streamID) {
+ return fmt.Errorf("unexpected blockHeader with smaller streamID=%s after bigger streamID=%s at position %d", &bhCurr.streamID, &bhPrev.streamID, i)
+ }
+ if !bhCurr.streamID.equal(&bhPrev.streamID) {
+ continue
+ }
+ thCurr := bhCurr.timestampsHeader
+ thPrev := bhPrev.timestampsHeader
+ if thCurr.minTimestamp < thPrev.minTimestamp {
+ return fmt.Errorf("unexpected blockHeader with smaller timestamp=%d after bigger timestamp=%d at position %d", thCurr.minTimestamp, thPrev.minTimestamp, i)
+ }
+ }
+ return nil
+}
+
+func resetBlockHeaders(bhs []blockHeader) []blockHeader {
+ for i := range bhs {
+ bhs[i].reset()
+ }
+ return bhs[:0]
+}
+
+func getColumnsHeader() *columnsHeader {
+ v := columnsHeaderPool.Get()
+ if v == nil {
+ return &columnsHeader{}
+ }
+ return v.(*columnsHeader)
+}
+
+func putColumnsHeader(csh *columnsHeader) {
+ csh.reset()
+ columnsHeaderPool.Put(csh)
+}
+
+var columnsHeaderPool sync.Pool
+
+// columnsHeader contains information about columns in a single block.
+//
+// columnsHeader is stored in the columnsHeaderFilename file.
+type columnsHeader struct {
+ // columnHeaders contains the information about every column seen in the block.
+ columnHeaders []columnHeader
+
+ // constColumns contain fields with constant values across all the block entries.
+ constColumns []Field
+}
+
+func (csh *columnsHeader) reset() {
+ chs := csh.columnHeaders
+ for i := range chs {
+ chs[i].reset()
+ }
+ csh.columnHeaders = chs[:0]
+
+ ccs := csh.constColumns
+ for i := range ccs {
+ ccs[i].Reset()
+ }
+ csh.constColumns = ccs[:0]
+}
+
+func (csh *columnsHeader) getConstColumnValue(name string) string {
+ if name == "_msg" {
+ name = ""
+ }
+ ccs := csh.constColumns
+ for i := range ccs {
+ cc := &ccs[i]
+ if cc.Name == name {
+ return cc.Value
+ }
+ }
+ return ""
+}
+
+func (csh *columnsHeader) getColumnHeader(name string) *columnHeader {
+ if name == "_msg" {
+ name = ""
+ }
+ chs := csh.columnHeaders
+ for i := range chs {
+ ch := &chs[i]
+ if ch.name == name {
+ return ch
+ }
+ }
+ return nil
+}
+
+func (csh *columnsHeader) resizeConstColumns(columnsLen int) []Field {
+ ccs := csh.constColumns
+ if n := columnsLen - cap(ccs); n > 0 {
+ ccs = append(ccs[:cap(ccs)], make([]Field, n)...)
+ }
+ ccs = ccs[:columnsLen]
+ csh.constColumns = ccs
+ return ccs
+}
+
+func (csh *columnsHeader) resizeColumnHeaders(columnHeadersLen int) []columnHeader {
+ chs := csh.columnHeaders
+ if n := columnHeadersLen - cap(chs); n > 0 {
+ chs = append(chs[:cap(chs)], make([]columnHeader, n)...)
+ }
+ chs = chs[:columnHeadersLen]
+ csh.columnHeaders = chs
+ return chs
+}
+
+func (csh *columnsHeader) marshal(dst []byte) []byte {
+ chs := csh.columnHeaders
+ dst = encoding.MarshalVarUint64(dst, uint64(len(chs)))
+ for i := range chs {
+ dst = chs[i].marshal(dst)
+ }
+
+ ccs := csh.constColumns
+ dst = encoding.MarshalVarUint64(dst, uint64(len(ccs)))
+ for i := range ccs {
+ dst = ccs[i].marshal(dst)
+ }
+
+ return dst
+}
+
+func (csh *columnsHeader) unmarshal(src []byte) error {
+ csh.reset()
+
+ // unmarshal columnHeaders
+ tail, n, err := encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal columnHeaders len: %w", err)
+ }
+ if n > maxColumnsPerBlock {
+ return fmt.Errorf("too many column headers: %d; mustn't exceed %d", n, maxColumnsPerBlock)
+ }
+ src = tail
+ chs := csh.resizeColumnHeaders(int(n))
+ for i := range chs {
+ tail, err = chs[i].unmarshal(src)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal columnHeader %d out of %d columnHeaders: %w", i, len(chs), err)
+ }
+ src = tail
+ }
+ csh.columnHeaders = chs
+
+ // unmarshal constColumns
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal constColumns len: %w", err)
+ }
+ if n+uint64(len(csh.columnHeaders)) > maxColumnsPerBlock {
+ return fmt.Errorf("too many columns: %d; mustn't exceed %d", n+uint64(len(csh.columnHeaders)), maxColumnsPerBlock)
+ }
+ src = tail
+ ccs := csh.resizeConstColumns(int(n))
+ for i := range ccs {
+ tail, err = ccs[i].unmarshal(src)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal constColumn %d out of %d columns: %w", i, len(ccs), err)
+ }
+ src = tail
+ }
+
+ // Verify that the src is empty
+ if len(src) > 0 {
+ return fmt.Errorf("unexpected non-empty tail left after unmarshaling columnsHeader: len(tail)=%d", len(src))
+ }
+
+ return nil
+}
+
+// columnHeaders contains information for values, which belong to a single label in a single block.
+//
+// The main column with an empty name is stored in messageValuesFilename,
+// while the rest of columns are stored in fieldValuesFilename.
+// This allows minimizing disk read IO when filtering by non-message columns.
+//
+// Every block column contains also a bloom filter for all the tokens stored in the column.
+// This bloom filter is used for fast determining whether the given block may contain the given tokens.
+//
+// Tokens in bloom filter depend on valueType:
+//
+// - valueTypeString stores lowercased tokens seen in all the values
+// - valueTypeDict doesn't store anything in the bloom filter, since all the encoded values
+// are available directly in the valuesDict field
+// - valueTypeUint8, valueTypeUint16, valueTypeUint32 and valueTypeUint64 stores encoded uint values
+// - valueTypeFloat64 stores encoded float64 values
+// - valueTypeIPv4 stores encoded into uint32 ips
+// - valueTypeTimestampISO8601 stores encoded into uint64 timestamps
+//
+// Bloom filters for main column with an empty name is stored in messageBloomFilename,
+// while the rest of columns are stored in fieldBloomFilename.
+type columnHeader struct {
+ // name contains column name aka label name
+ name string
+
+ // valueType is the type of values stored in the block
+ valueType valueType
+
+ // minValue is the minimum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader
+ //
+ // It is used for fast detection of whether the given columnHeader contains values in the given range
+ minValue uint64
+
+ // maxValue is the maximum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader
+ //
+ // It is used for fast detection of whether the given columnHeader contains values in the given range
+ maxValue uint64
+
+ // valuesDict contains unique values for valueType = valueTypeDict
+ valuesDict valuesDict
+
+ // valuesOffset contains the offset of the block in either messageValuesFilename or fieldValuesFilename
+ valuesOffset uint64
+
+ // valuesSize contains the size of the block in either messageValuesFilename or fieldValuesFilename
+ valuesSize uint64
+
+ // bloomFilterOffset contains the offset of the bloom filter in either messageBloomFilename or fieldBloomFilename
+ bloomFilterOffset uint64
+
+ // bloomFilterSize contains the size of the bloom filter in either messageBloomFilename or fieldBloomFilename
+ bloomFilterSize uint64
+}
+
+// reset resets ch
+func (ch *columnHeader) reset() {
+ ch.name = ""
+ ch.valueType = 0
+
+ ch.minValue = 0
+ ch.maxValue = 0
+ ch.valuesDict.reset()
+
+ ch.valuesOffset = 0
+ ch.valuesSize = 0
+
+ ch.bloomFilterOffset = 0
+ ch.bloomFilterSize = 0
+}
+
+// marshal appends marshaled ch to dst and returns the result.
+func (ch *columnHeader) marshal(dst []byte) []byte {
+ // check minValue/maxValue
+ if ch.valueType == valueTypeFloat64 {
+ minValue := math.Float64frombits(ch.minValue)
+ maxValue := math.Float64frombits(ch.maxValue)
+ if minValue > maxValue {
+ logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g", minValue, maxValue)
+ }
+ } else {
+ if ch.minValue > ch.maxValue {
+ logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d", ch.minValue, ch.maxValue)
+ }
+ }
+
+ // Encode common fields - ch.name and ch.valueType
+ dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(ch.name))
+ dst = append(dst, byte(ch.valueType))
+
+ // Encode other fields depending on ch.valueType
+ switch ch.valueType {
+ case valueTypeString:
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeDict:
+ dst = ch.valuesDict.marshal(dst)
+ dst = ch.marshalValues(dst)
+ case valueTypeUint8:
+ dst = append(dst, byte(ch.minValue))
+ dst = append(dst, byte(ch.maxValue))
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeUint16:
+ dst = encoding.MarshalUint16(dst, uint16(ch.minValue))
+ dst = encoding.MarshalUint16(dst, uint16(ch.maxValue))
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeUint32:
+ dst = encoding.MarshalUint32(dst, uint32(ch.minValue))
+ dst = encoding.MarshalUint32(dst, uint32(ch.maxValue))
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeUint64:
+ dst = encoding.MarshalUint64(dst, ch.minValue)
+ dst = encoding.MarshalUint64(dst, ch.maxValue)
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeFloat64:
+ // float64 values are encoded as uint64 via math.Float64bits()
+ dst = encoding.MarshalUint64(dst, ch.minValue)
+ dst = encoding.MarshalUint64(dst, ch.maxValue)
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeIPv4:
+ dst = encoding.MarshalUint32(dst, uint32(ch.minValue))
+ dst = encoding.MarshalUint32(dst, uint32(ch.maxValue))
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ case valueTypeTimestampISO8601:
+ // timestamps are encoded in nanoseconds
+ dst = encoding.MarshalUint64(dst, ch.minValue)
+ dst = encoding.MarshalUint64(dst, ch.maxValue)
+ dst = ch.marshalValuesAndBloomFilters(dst)
+ default:
+ logger.Panicf("BUG: unknown valueType=%d", ch.valueType)
+ }
+
+ return dst
+}
+
+func (ch *columnHeader) marshalValuesAndBloomFilters(dst []byte) []byte {
+ dst = ch.marshalValues(dst)
+ dst = ch.marshalBloomFilters(dst)
+ return dst
+}
+
+func (ch *columnHeader) marshalValues(dst []byte) []byte {
+ dst = encoding.MarshalVarUint64(dst, ch.valuesOffset)
+ dst = encoding.MarshalVarUint64(dst, ch.valuesSize)
+ return dst
+}
+
+func (ch *columnHeader) marshalBloomFilters(dst []byte) []byte {
+ dst = encoding.MarshalVarUint64(dst, ch.bloomFilterOffset)
+ dst = encoding.MarshalVarUint64(dst, ch.bloomFilterSize)
+ return dst
+}
+
+// unmarshal unmarshals ch from src and returns the tail left after unmarshaling.
+func (ch *columnHeader) unmarshal(src []byte) ([]byte, error) {
+ ch.reset()
+
+ srcOrig := src
+
+ // Unmarshal column name
+ tail, data, err := encoding.UnmarshalBytes(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal column name: %w", err)
+ }
+ // Do not use bytesutil.InternBytes(data) here, since it works slower than the string(data) in prod
+ ch.name = string(data)
+ src = tail
+
+ // Unmarshal value type
+ if len(src) < 1 {
+ return srcOrig, fmt.Errorf("cannot unmarshal valueType from 0 bytes for column %q; need at least 1 byte", ch.name)
+ }
+ ch.valueType = valueType(src[0])
+ src = src[1:]
+
+ // Unmarshal the rest of data depending on valueType
+ switch ch.valueType {
+ case valueTypeString:
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeString for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeDict:
+ tail, err = ch.valuesDict.unmarshal(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal dict at valueTypeDict for column %q: %w", ch.name, err)
+ }
+ src = tail
+
+ tail, err = ch.unmarshalValues(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values at valueTypeDict for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeUint8:
+ if len(src) < 2 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint8 from %d bytes for column %q; need at least 2 bytes", len(src), ch.name)
+ }
+ ch.minValue = uint64(src[0])
+ ch.maxValue = uint64(src[1])
+ src = src[2:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint8 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeUint16:
+ if len(src) < 4 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint16 from %d bytes for column %q; need at least 4 bytes", len(src), ch.name)
+ }
+ ch.minValue = uint64(encoding.UnmarshalUint16(src))
+ ch.maxValue = uint64(encoding.UnmarshalUint16(src[2:]))
+ src = src[4:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint16 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeUint32:
+ if len(src) < 8 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint32 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name)
+ }
+ ch.minValue = uint64(encoding.UnmarshalUint32(src))
+ ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:]))
+ src = src[8:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint32 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeUint64:
+ if len(src) < 16 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
+ }
+ ch.minValue = encoding.UnmarshalUint64(src)
+ ch.maxValue = encoding.UnmarshalUint64(src[8:])
+ src = src[16:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint64 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeFloat64:
+ if len(src) < 16 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeFloat64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
+ }
+ // min and max values must be converted to real values with math.Float64frombits() during querying.
+ ch.minValue = encoding.UnmarshalUint64(src)
+ ch.maxValue = encoding.UnmarshalUint64(src[8:])
+ src = src[16:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeFloat64 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeIPv4:
+ if len(src) < 8 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeIPv4 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name)
+ }
+ ch.minValue = uint64(encoding.UnmarshalUint32(src))
+ ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:]))
+ src = src[8:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeIPv4 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ case valueTypeTimestampISO8601:
+ if len(src) < 16 {
+ return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeTimestampISO8601 from %d bytes for column %q; need at least 16 bytes",
+ len(src), ch.name)
+ }
+ ch.minValue = encoding.UnmarshalUint64(src)
+ ch.maxValue = encoding.UnmarshalUint64(src[8:])
+ src = src[16:]
+
+ tail, err = ch.unmarshalValuesAndBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeTimestampISO8601 for column %q: %w", ch.name, err)
+ }
+ src = tail
+ default:
+ return srcOrig, fmt.Errorf("unexpected valueType=%d for column %q", ch.valueType, ch.name)
+ }
+
+ return src, nil
+}
+
+func (ch *columnHeader) unmarshalValuesAndBloomFilters(src []byte) ([]byte, error) {
+ srcOrig := src
+
+ tail, err := ch.unmarshalValues(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal values: %w", err)
+ }
+ src = tail
+
+ tail, err = ch.unmarshalBloomFilters(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal bloom filters: %w", err)
+ }
+ src = tail
+
+ return src, nil
+}
+
+func (ch *columnHeader) unmarshalValues(src []byte) ([]byte, error) {
+ srcOrig := src
+
+ tail, n, err := encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal valuesOffset: %w", err)
+ }
+ ch.valuesOffset = n
+ src = tail
+
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal valuesSize: %w", err)
+ }
+ if n > maxValuesBlockSize {
+ return srcOrig, fmt.Errorf("too big valuesSize: %d bytes; mustn't exceed %d bytes", n, maxValuesBlockSize)
+ }
+ ch.valuesSize = n
+ src = tail
+
+ return src, nil
+}
+
+func (ch *columnHeader) unmarshalBloomFilters(src []byte) ([]byte, error) {
+ srcOrig := src
+
+ tail, n, err := encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterOffset: %w", err)
+ }
+ ch.bloomFilterOffset = n
+ src = tail
+
+ tail, n, err = encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterSize: %w", err)
+ }
+ if n > maxBloomFilterBlockSize {
+ return srcOrig, fmt.Errorf("too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", n, maxBloomFilterBlockSize)
+ }
+ ch.bloomFilterSize = n
+ src = tail
+
+ return src, nil
+}
+
+// timestampsHeader contains the information about timestamps block.
+type timestampsHeader struct {
+ // blockOffset is an offset of timestamps block inside timestampsFilename file
+ blockOffset uint64
+
+ // blockSize is the size of the timestamps block inside timestampsFilename file
+ blockSize uint64
+
+ // minTimestamp is the mimumum timestamp seen in the block
+ minTimestamp int64
+
+ // maxTimestamp is the maximum timestamp seen in the block
+ maxTimestamp int64
+
+ // marshalType is the type used for encoding the timestamps block
+ marshalType encoding.MarshalType
+}
+
+// reset resets th, so it can be reused
+func (th *timestampsHeader) reset() {
+ th.blockOffset = 0
+ th.blockSize = 0
+ th.minTimestamp = 0
+ th.maxTimestamp = 0
+ th.marshalType = 0
+}
+
+func (th *timestampsHeader) copyFrom(src *timestampsHeader) {
+ th.blockOffset = src.blockOffset
+ th.blockSize = src.blockSize
+ th.minTimestamp = src.minTimestamp
+ th.maxTimestamp = src.maxTimestamp
+ th.marshalType = src.marshalType
+}
+
+// marshal appends marshaled th to dst and returns the result.
+func (th *timestampsHeader) marshal(dst []byte) []byte {
+ dst = encoding.MarshalUint64(dst, th.blockOffset)
+ dst = encoding.MarshalUint64(dst, th.blockSize)
+ dst = encoding.MarshalUint64(dst, uint64(th.minTimestamp))
+ dst = encoding.MarshalUint64(dst, uint64(th.maxTimestamp))
+ dst = append(dst, byte(th.marshalType))
+ return dst
+}
+
+// unmarshal unmarshals th from src and returns the tail left after the unmarshaling.
+func (th *timestampsHeader) unmarshal(src []byte) ([]byte, error) {
+ th.reset()
+
+ if len(src) < 33 {
+ return src, fmt.Errorf("cannot unmarshal timestampsHeader from %d bytes; need at least 33 bytes", len(src))
+ }
+
+ th.blockOffset = encoding.UnmarshalUint64(src)
+ th.blockSize = encoding.UnmarshalUint64(src[8:])
+ th.minTimestamp = int64(encoding.UnmarshalUint64(src[16:]))
+ th.maxTimestamp = int64(encoding.UnmarshalUint64(src[24:]))
+ th.marshalType = encoding.MarshalType(src[32])
+
+ return src[33:], nil
+}
diff --git a/lib/logstorage/block_header_test.go b/lib/logstorage/block_header_test.go
new file mode 100644
index 000000000..d6df322f6
--- /dev/null
+++ b/lib/logstorage/block_header_test.go
@@ -0,0 +1,454 @@
+package logstorage
+
+import (
+ "reflect"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+)
+
+func TestBlockHeaderMarshalUnmarshal(t *testing.T) {
+ f := func(bh *blockHeader, marshaledLen int) {
+ t.Helper()
+ data := bh.marshal(nil)
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected lengths of the marshaled blockHeader; got %d; want %d", len(data), marshaledLen)
+ }
+ bh2 := &blockHeader{}
+ tail, err := bh2.unmarshal(data)
+ if err != nil {
+ t.Fatalf("unexpected error in unmarshal: %s", err)
+ }
+ if len(tail) > 0 {
+ t.Fatalf("unexpected non-empty tail after unmarshal: %X", tail)
+ }
+ if !reflect.DeepEqual(bh, bh2) {
+ t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", bh2, bh)
+ }
+ }
+ f(&blockHeader{}, 61)
+ f(&blockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ lo: 3443,
+ hi: 23434,
+ },
+ },
+ uncompressedSizeBytes: 4344,
+ rowsCount: 1234,
+ timestampsHeader: timestampsHeader{
+ blockOffset: 13234,
+ blockSize: 8843,
+ minTimestamp: -4334,
+ maxTimestamp: 23434,
+ marshalType: encoding.MarshalTypeNearestDelta2,
+ },
+ columnsHeaderOffset: 4384,
+ columnsHeaderSize: 894,
+ }, 65)
+}
+
+func TestColumnsHeaderMarshalUnmarshal(t *testing.T) {
+ f := func(csh *columnsHeader, marshaledLen int) {
+ t.Helper()
+ data := csh.marshal(nil)
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected lengths of the marshaled columnsHeader; got %d; want %d", len(data), marshaledLen)
+ }
+ csh2 := &columnsHeader{}
+ err := csh2.unmarshal(data)
+ if err != nil {
+ t.Fatalf("unexpected error in unmarshal: %s", err)
+ }
+ if !reflect.DeepEqual(csh, csh2) {
+ t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", csh2, csh)
+ }
+ }
+ f(&columnsHeader{}, 2)
+ f(&columnsHeader{
+ columnHeaders: []columnHeader{
+ {
+ name: "foobar",
+ valueType: valueTypeString,
+ valuesOffset: 12345,
+ valuesSize: 23434,
+ bloomFilterOffset: 89843,
+ bloomFilterSize: 8934,
+ },
+ {
+ name: "message",
+ valueType: valueTypeUint16,
+ minValue: 123,
+ maxValue: 456,
+ valuesOffset: 3412345,
+ valuesSize: 234434,
+ bloomFilterOffset: 83,
+ bloomFilterSize: 34,
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "foo",
+ Value: "bar",
+ },
+ },
+ }, 50)
+}
+
+func TestBlockHeaderUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ dataOrig := append([]byte{}, data...)
+ bh := getBlockHeader()
+ defer putBlockHeader(bh)
+ tail, err := bh.unmarshal(data)
+ if err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ if string(tail) != string(dataOrig) {
+ t.Fatalf("unexpected tail;\ngot\n%q\nwant\n%q", tail, dataOrig)
+ }
+ }
+ f(nil)
+ f([]byte("foo"))
+
+ bh := blockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ lo: 3443,
+ hi: 23434,
+ },
+ },
+ uncompressedSizeBytes: 4344,
+ rowsCount: 1234,
+ timestampsHeader: timestampsHeader{
+ blockOffset: 13234,
+ blockSize: 8843,
+ minTimestamp: -4334,
+ maxTimestamp: 23434,
+ marshalType: encoding.MarshalTypeNearestDelta2,
+ },
+ columnsHeaderOffset: 4384,
+ columnsHeaderSize: 894,
+ }
+ data := bh.marshal(nil)
+ for len(data) > 0 {
+ data = data[:len(data)-1]
+ f(data)
+ }
+}
+
+func TestColumnsHeaderUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ csh := getColumnsHeader()
+ defer putColumnsHeader(csh)
+ err := csh.unmarshal(data)
+ if err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ }
+ f(nil)
+ f([]byte("foo"))
+
+ csh := columnsHeader{
+ columnHeaders: []columnHeader{
+ {
+ name: "foobar",
+ valueType: valueTypeString,
+ valuesOffset: 12345,
+ valuesSize: 23434,
+ bloomFilterOffset: 89843,
+ bloomFilterSize: 8934,
+ },
+ {
+ name: "message",
+ valueType: valueTypeUint16,
+ minValue: 123,
+ maxValue: 456,
+ valuesOffset: 3412345,
+ valuesSize: 234434,
+ bloomFilterOffset: 83,
+ bloomFilterSize: 34,
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "foo",
+ Value: "bar",
+ },
+ },
+ }
+ data := csh.marshal(nil)
+ for len(data) > 0 {
+ data = data[:len(data)-1]
+ f(data)
+ }
+}
+
+func TestBlockHeaderReset(t *testing.T) {
+ bh := &blockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ lo: 3443,
+ hi: 23434,
+ },
+ },
+ uncompressedSizeBytes: 8984,
+ rowsCount: 1234,
+ timestampsHeader: timestampsHeader{
+ blockOffset: 13234,
+ blockSize: 8843,
+ minTimestamp: -4334,
+ maxTimestamp: 23434,
+ marshalType: encoding.MarshalTypeNearestDelta2,
+ },
+ columnsHeaderOffset: 12332,
+ columnsHeaderSize: 234,
+ }
+ bh.reset()
+ bhZero := &blockHeader{}
+ if !reflect.DeepEqual(bh, bhZero) {
+ t.Fatalf("unexpected non-zero blockHeader after reset: %v", bh)
+ }
+}
+
+func TestColumnsHeaderReset(t *testing.T) {
+ csh := &columnsHeader{
+ columnHeaders: []columnHeader{
+ {
+ name: "foobar",
+ valueType: valueTypeString,
+ valuesOffset: 12345,
+ valuesSize: 23434,
+ bloomFilterOffset: 89843,
+ bloomFilterSize: 8934,
+ },
+ {
+ name: "message",
+ valueType: valueTypeUint16,
+ minValue: 123,
+ maxValue: 456,
+ valuesOffset: 3412345,
+ valuesSize: 234434,
+ bloomFilterOffset: 83,
+ bloomFilterSize: 34,
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "foo",
+ Value: "bar",
+ },
+ },
+ }
+ csh.reset()
+ cshZero := &columnsHeader{
+ columnHeaders: []columnHeader{},
+ constColumns: []Field{},
+ }
+ if !reflect.DeepEqual(csh, cshZero) {
+ t.Fatalf("unexpected non-zero columnsHeader after reset: %v", csh)
+ }
+}
+
+func TestMarshalUnmarshalBlockHeaders(t *testing.T) {
+ f := func(bhs []blockHeader, marshaledLen int) {
+ t.Helper()
+ var data []byte
+ for i := range bhs {
+ data = bhs[i].marshal(data)
+ }
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected length for marshaled blockHeader entries; got %d; want %d", len(data), marshaledLen)
+ }
+ bhs2, err := unmarshalBlockHeaders(nil, data)
+ if err != nil {
+ t.Fatalf("unexpected error when unmarshaling blockHeader entries: %s", err)
+ }
+ if !reflect.DeepEqual(bhs, bhs2) {
+ t.Fatalf("unexpected blockHeader entries unmarshaled\ngot\n%v\nwant\n%v", bhs2, bhs)
+ }
+ }
+ f(nil, 0)
+ f([]blockHeader{{}}, 61)
+ f([]blockHeader{
+ {},
+ {
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ lo: 3443,
+ hi: 23434,
+ },
+ },
+ uncompressedSizeBytes: 89894,
+ rowsCount: 1234,
+ timestampsHeader: timestampsHeader{
+ blockOffset: 13234,
+ blockSize: 8843,
+ minTimestamp: -4334,
+ maxTimestamp: 23434,
+ marshalType: encoding.MarshalTypeNearestDelta2,
+ },
+ columnsHeaderOffset: 12332,
+ columnsHeaderSize: 234,
+ },
+ }, 127)
+}
+
+func TestColumnHeaderMarshalUnmarshal(t *testing.T) {
+ f := func(ch *columnHeader, marshaledLen int) {
+ t.Helper()
+ data := ch.marshal(nil)
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected marshaled length of columnHeader; got %d; want %d", len(data), marshaledLen)
+ }
+ var ch2 columnHeader
+ tail, err := ch2.unmarshal(data)
+ if err != nil {
+ t.Fatalf("unexpected error in umarshal(%v): %s", ch, err)
+ }
+ if len(tail) > 0 {
+ t.Fatalf("unexpected non-empty tail after unmarshal(%v): %X", ch, tail)
+ }
+ if !reflect.DeepEqual(ch, &ch2) {
+ t.Fatalf("unexpected columnHeader after unmarshal;\ngot\n%v\nwant\n%v", &ch2, ch)
+ }
+ }
+ f(&columnHeader{
+ name: "foo",
+ valueType: valueTypeUint8,
+ }, 11)
+ ch := &columnHeader{
+ name: "foobar",
+ valueType: valueTypeDict,
+
+ valuesOffset: 12345,
+ valuesSize: 254452,
+ }
+ ch.valuesDict.getOrAdd("abc")
+ f(ch, 18)
+}
+
+func TestColumnHeaderUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ dataOrig := append([]byte{}, data...)
+ var ch columnHeader
+ tail, err := ch.unmarshal(data)
+ if err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ if string(tail) != string(dataOrig) {
+ t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig)
+ }
+ }
+ f(nil)
+ f([]byte("foo"))
+
+ ch := &columnHeader{
+ name: "abc",
+ valueType: valueTypeUint16,
+ bloomFilterSize: 3244,
+ }
+ data := ch.marshal(nil)
+ f(data[:len(data)-1])
+}
+
+func TestColumnHeaderReset(t *testing.T) {
+ ch := &columnHeader{
+ name: "foobar",
+ valueType: valueTypeUint16,
+
+ valuesOffset: 12345,
+ valuesSize: 254452,
+
+ bloomFilterOffset: 34898234,
+ bloomFilterSize: 873434,
+ }
+ ch.valuesDict.getOrAdd("abc")
+ ch.reset()
+ chZero := &columnHeader{}
+ chZero.valuesDict.values = []string{}
+ if !reflect.DeepEqual(ch, chZero) {
+ t.Fatalf("unexpected non-zero columnHeader after reset: %v", ch)
+ }
+}
+
+func TestTimestampsHeaderMarshalUnmarshal(t *testing.T) {
+ f := func(th *timestampsHeader, marshaledLen int) {
+ t.Helper()
+ data := th.marshal(nil)
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected length of marshaled timestampsHeader; got %d; want %d", len(data), marshaledLen)
+ }
+ var th2 timestampsHeader
+ tail, err := th2.unmarshal(data)
+ if err != nil {
+ t.Fatalf("unexpected error in unmarshal(%v): %s", th, err)
+ }
+ if len(tail) > 0 {
+ t.Fatalf("unexpected non-nil tail after unmarshal(%v): %X", th, tail)
+ }
+ if !reflect.DeepEqual(th, &th2) {
+ t.Fatalf("unexpected timestampsHeader after unmarshal; got\n%v\nwant\n%v", &th2, th)
+ }
+ }
+ f(×tampsHeader{}, 33)
+
+ f(×tampsHeader{
+ blockOffset: 12345,
+ blockSize: 3424834,
+ minTimestamp: -123443,
+ maxTimestamp: 234343,
+ marshalType: encoding.MarshalTypeZSTDNearestDelta,
+ }, 33)
+}
+
+func TestTimestampsHeaderUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ dataOrig := append([]byte{}, data...)
+ var th timestampsHeader
+ tail, err := th.unmarshal(data)
+ if err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ if string(tail) != string(dataOrig) {
+ t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig)
+ }
+ }
+ f(nil)
+ f([]byte("foo"))
+}
+
+func TestTimestampsHeaderReset(t *testing.T) {
+ th := ×tampsHeader{
+ blockOffset: 12345,
+ blockSize: 3424834,
+ minTimestamp: -123443,
+ maxTimestamp: 234343,
+ marshalType: encoding.MarshalTypeZSTDNearestDelta,
+ }
+ th.reset()
+ thZero := ×tampsHeader{}
+ if !reflect.DeepEqual(th, thZero) {
+ t.Fatalf("unexpected non-zero timestampsHeader after reset: %v", th)
+ }
+}
diff --git a/lib/logstorage/block_search.go b/lib/logstorage/block_search.go
new file mode 100644
index 000000000..4157189d0
--- /dev/null
+++ b/lib/logstorage/block_search.go
@@ -0,0 +1,645 @@
+package logstorage
+
+import (
+ "strconv"
+ "sync"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+type blockSearchWork struct {
+ // p is the part where the block belongs to.
+ p *part
+
+ // so contains search options for the block search
+ so *searchOptions
+
+ // bh is the header of the block to search.
+ bh blockHeader
+}
+
+func newBlockSearchWork(p *part, so *searchOptions, bh *blockHeader) *blockSearchWork {
+ var bsw blockSearchWork
+ bsw.p = p
+ bsw.so = so
+ bsw.bh.copyFrom(bh)
+ return &bsw
+}
+
+func getBlockSearch() *blockSearch {
+ v := blockSearchPool.Get()
+ if v == nil {
+ return &blockSearch{}
+ }
+ return v.(*blockSearch)
+}
+
+func putBlockSearch(bs *blockSearch) {
+ bs.reset()
+ blockSearchPool.Put(bs)
+}
+
+var blockSearchPool sync.Pool
+
+type blockSearch struct {
+ // bsw is the actual work to perform on the given block pointed by bsw.ph
+ bsw *blockSearchWork
+
+ // br contains result for the search in the block after search() call
+ br blockResult
+
+ // timestampsCache contains cached timestamps for the given block.
+ timestampsCache *encoding.Int64s
+
+ // bloomFilterCache contains cached bloom filters for requested columns in the given block
+ bloomFilterCache map[string]*bloomFilter
+
+ // valuesCache contains cached values for requested columns in the given block
+ valuesCache map[string]*stringBucket
+
+ // sbu is used for unmarshaling local columns
+ sbu stringsBlockUnmarshaler
+
+ // csh is the columnsHeader associated with the given block
+ csh columnsHeader
+}
+
+func (bs *blockSearch) reset() {
+ bs.bsw = nil
+ bs.br.reset()
+
+ if bs.timestampsCache != nil {
+ encoding.PutInt64s(bs.timestampsCache)
+ bs.timestampsCache = nil
+ }
+
+ bloomFilterCache := bs.bloomFilterCache
+ for k, bf := range bloomFilterCache {
+ putBloomFilter(bf)
+ delete(bloomFilterCache, k)
+ }
+
+ valuesCache := bs.valuesCache
+ for k, values := range valuesCache {
+ putStringBucket(values)
+ delete(valuesCache, k)
+ }
+
+ bs.sbu.reset()
+ bs.csh.reset()
+}
+
+func (bs *blockSearch) partPath() string {
+ return bs.bsw.p.path
+}
+
+func (bs *blockSearch) search(bsw *blockSearchWork) {
+ bs.reset()
+
+ bs.bsw = bsw
+
+ bs.csh.initFromBlockHeader(bsw.p, &bsw.bh)
+
+ // search rows matching the given filter
+ bm := getFilterBitmap(int(bsw.bh.rowsCount))
+ bm.setBits()
+ bs.bsw.so.filter.apply(bs, bm)
+
+ bs.br.mustInit(bs, bm)
+ if bm.isZero() {
+ putFilterBitmap(bm)
+ return
+ }
+
+ // fetch the requested columns to bs.br.
+ for _, columnName := range bs.bsw.so.resultColumnNames {
+ switch columnName {
+ case "_stream":
+ bs.br.addStreamColumn(bs)
+ case "_time":
+ bs.br.addTimeColumn(bs)
+ default:
+ v := bs.csh.getConstColumnValue(columnName)
+ if v != "" {
+ bs.br.addConstColumn(v)
+ continue
+ }
+ ch := bs.csh.getColumnHeader(columnName)
+ if ch == nil {
+ bs.br.addConstColumn("")
+ } else {
+ bs.br.addColumn(bs, ch, bm)
+ }
+ }
+ }
+ putFilterBitmap(bm)
+}
+
+func (csh *columnsHeader) initFromBlockHeader(p *part, bh *blockHeader) {
+ bb := longTermBufPool.Get()
+ columnsHeaderSize := bh.columnsHeaderSize
+ if columnsHeaderSize > maxColumnsHeaderSize {
+ logger.Panicf("FATAL: %s: columns header size cannot exceed %d bytes; got %d bytes", p.path, maxColumnsHeaderSize, columnsHeaderSize)
+ }
+ bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize))
+ p.columnsHeaderFile.MustReadAt(bb.B, int64(bh.columnsHeaderOffset))
+
+ if err := csh.unmarshal(bb.B); err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", p.path, err)
+ }
+ longTermBufPool.Put(bb)
+}
+
+// getBloomFilterForColumn returns bloom filter for the given ch.
+//
+// The returned bloom filter belongs to bs, so it becomes invalid after bs reset.
+func (bs *blockSearch) getBloomFilterForColumn(ch *columnHeader) *bloomFilter {
+ bf := bs.bloomFilterCache[ch.name]
+ if bf != nil {
+ return bf
+ }
+
+ p := bs.bsw.p
+
+ bloomFilterFile := p.fieldBloomFilterFile
+ if ch.name == "" {
+ bloomFilterFile = p.messageBloomFilterFile
+ }
+
+ bb := longTermBufPool.Get()
+ bloomFilterSize := ch.bloomFilterSize
+ if bloomFilterSize > maxBloomFilterBlockSize {
+ logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxBloomFilterBlockSize, bloomFilterSize)
+ }
+ bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(bloomFilterSize))
+ bloomFilterFile.MustReadAt(bb.B, int64(ch.bloomFilterOffset))
+ bf = getBloomFilter()
+ if err := bf.unmarshal(bb.B); err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal bloom filter: %s", bs.partPath(), err)
+ }
+ longTermBufPool.Put(bb)
+
+ if bs.bloomFilterCache == nil {
+ bs.bloomFilterCache = make(map[string]*bloomFilter)
+ }
+ bs.bloomFilterCache[ch.name] = bf
+ return bf
+}
+
+// getValuesForColumn returns block values for the given ch.
+//
+// The returned values belong to bs, so they become invalid after bs reset.
+func (bs *blockSearch) getValuesForColumn(ch *columnHeader) []string {
+ values := bs.valuesCache[ch.name]
+ if values != nil {
+ return values.a
+ }
+
+ p := bs.bsw.p
+
+ valuesFile := p.fieldValuesFile
+ if ch.name == "" {
+ valuesFile = p.messageValuesFile
+ }
+
+ bb := longTermBufPool.Get()
+ valuesSize := ch.valuesSize
+ if valuesSize > maxValuesBlockSize {
+ logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxValuesBlockSize, valuesSize)
+ }
+ bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(valuesSize))
+ valuesFile.MustReadAt(bb.B, int64(ch.valuesOffset))
+
+ values = getStringBucket()
+ var err error
+ values.a, err = bs.sbu.unmarshal(values.a[:0], bb.B, bs.bsw.bh.rowsCount)
+ longTermBufPool.Put(bb)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal column %q: %s", bs.partPath(), ch.name, err)
+ }
+
+ if bs.valuesCache == nil {
+ bs.valuesCache = make(map[string]*stringBucket)
+ }
+ bs.valuesCache[ch.name] = values
+ return values.a
+}
+
+// getTimestamps returns timestamps for the given bs.
+//
+// The returned timestamps belong to bs, so they become invalid after bs reset.
+func (bs *blockSearch) getTimestamps() []int64 {
+ timestamps := bs.timestampsCache
+ if timestamps != nil {
+ return timestamps.A
+ }
+
+ p := bs.bsw.p
+
+ bb := longTermBufPool.Get()
+ th := &bs.bsw.bh.timestampsHeader
+ blockSize := th.blockSize
+ if blockSize > maxTimestampsBlockSize {
+ logger.Panicf("FATAL: %s: timestamps block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxTimestampsBlockSize, blockSize)
+ }
+ bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(blockSize))
+ p.timestampsFile.MustReadAt(bb.B, int64(th.blockOffset))
+
+ rowsCount := int(bs.bsw.bh.rowsCount)
+ timestamps = encoding.GetInt64s(rowsCount)
+ var err error
+ timestamps.A, err = encoding.UnmarshalTimestamps(timestamps.A[:0], bb.B, th.marshalType, th.minTimestamp, rowsCount)
+ longTermBufPool.Put(bb)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal timestamps: %s", bs.partPath(), err)
+ }
+ bs.timestampsCache = timestamps
+ return timestamps.A
+}
+
+// mustReadBlockHeaders reads ih block headers from p, appends them to dst and returns the result.
+func (ih *indexBlockHeader) mustReadBlockHeaders(dst []blockHeader, p *part) []blockHeader {
+ bbCompressed := longTermBufPool.Get()
+ indexBlockSize := ih.indexBlockSize
+ if indexBlockSize > maxIndexBlockSize {
+ logger.Panicf("FATAL: %s: index block size cannot exceed %d bytes; got %d bytes", p.indexFile.Path(), maxIndexBlockSize, indexBlockSize)
+ }
+ bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize))
+ p.indexFile.MustReadAt(bbCompressed.B, int64(ih.indexBlockOffset))
+
+ bb := longTermBufPool.Get()
+ var err error
+ bb.B, err = encoding.DecompressZSTD(bb.B, bbCompressed.B)
+ longTermBufPool.Put(bbCompressed)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err)
+ }
+
+ dst, err = unmarshalBlockHeaders(dst, bb.B)
+ longTermBufPool.Put(bb)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal block headers read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err)
+ }
+
+ return dst
+}
+
+type blockResult struct {
+ buf []byte
+ valuesBuf []string
+
+ // streamID is streamID for the given blockResult
+ streamID streamID
+
+ // cs contain values for result columns
+ cs []blockResultColumn
+
+ // timestamps contain timestamps for the selected log entries
+ timestamps []int64
+}
+
+func (br *blockResult) reset() {
+ br.buf = br.buf[:0]
+
+ vb := br.valuesBuf
+ for i := range vb {
+ vb[i] = ""
+ }
+ br.valuesBuf = vb[:0]
+
+ br.streamID.reset()
+
+ cs := br.cs
+ for i := range cs {
+ cs[i].reset()
+ }
+ br.cs = cs[:0]
+
+ br.timestamps = br.timestamps[:0]
+}
+
+func (br *blockResult) RowsCount() int {
+ return len(br.timestamps)
+}
+
+func (br *blockResult) mustInit(bs *blockSearch, bm *filterBitmap) {
+ br.reset()
+
+ br.streamID = bs.bsw.bh.streamID
+
+ if !bm.isZero() {
+ // Initialize timestamps, since they are used for determining the number of rows in br.RowsCount()
+ srcTimestamps := bs.getTimestamps()
+ dstTimestamps := br.timestamps[:0]
+ bm.forEachSetBit(func(idx int) bool {
+ ts := srcTimestamps[idx]
+ dstTimestamps = append(dstTimestamps, ts)
+ return true
+ })
+ br.timestamps = dstTimestamps
+ }
+}
+
+func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *filterBitmap) {
+ buf := br.buf
+ valuesBuf := br.valuesBuf
+ valuesBufLen := len(valuesBuf)
+ var dictValues []string
+
+ appendValue := func(v string) {
+ bufLen := len(buf)
+ buf = append(buf, v...)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ valuesBuf = append(valuesBuf, s)
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ visitValues(bs, ch, bm, func(v string) bool {
+ appendValue(v)
+ return true
+ })
+ case valueTypeDict:
+ dictValues = ch.valuesDict.values
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 1 {
+ logger.Panicf("FATAL: %s: unexpected dict value size for column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v))
+ }
+ dictIdx := v[0]
+ if int(dictIdx) >= len(dictValues) {
+ logger.Panicf("FATAL: %s: too big dict index for column %q: %d; should be smaller than %d", bs.partPath(), ch.name, dictIdx, len(dictValues))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeUint8:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 1 {
+ logger.Panicf("FATAL: %s: unexpected size for uint8 column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeUint16:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 2 {
+ logger.Panicf("FATAL: %s: unexpected size for uint16 column %q; got %d bytes; want 2 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeUint32:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected size for uint32 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeUint64:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected size for uint64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeFloat64:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected size for float64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeIPv4:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected size for ipv4 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ case valueTypeTimestampISO8601:
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected size for timestmap column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
+ }
+ appendValue(v)
+ return true
+ })
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d for column %q", bs.partPath(), ch.valueType, ch.name)
+ }
+
+ encodedValues := valuesBuf[valuesBufLen:]
+
+ valuesBufLen = len(valuesBuf)
+ for _, v := range dictValues {
+ appendValue(v)
+ }
+ dictValues = valuesBuf[valuesBufLen:]
+
+ br.cs = append(br.cs, blockResultColumn{
+ valueType: ch.valueType,
+ dictValues: dictValues,
+ encodedValues: encodedValues,
+ })
+ br.buf = buf
+ br.valuesBuf = valuesBuf
+}
+
+func (br *blockResult) addTimeColumn(bs *blockSearch) {
+ br.cs = append(br.cs, blockResultColumn{
+ isTime: true,
+ })
+}
+
+func (br *blockResult) addStreamColumn(bs *blockSearch) {
+ bb := bbPool.Get()
+ bb.B = bs.bsw.p.pt.appendStreamTagsByStreamID(bb.B[:0], &br.streamID)
+ if len(bb.B) > 0 {
+ st := GetStreamTags()
+ mustUnmarshalStreamTags(st, bb.B)
+ bb.B = st.marshalString(bb.B[:0])
+ PutStreamTags(st)
+ }
+ s := bytesutil.ToUnsafeString(bb.B)
+ br.addConstColumn(s)
+ bbPool.Put(bb)
+}
+
+func (br *blockResult) addConstColumn(value string) {
+ buf := br.buf
+ bufLen := len(buf)
+ buf = append(buf, value...)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ br.buf = buf
+
+ valuesBuf := br.valuesBuf
+ valuesBufLen := len(valuesBuf)
+ valuesBuf = append(valuesBuf, s)
+ br.valuesBuf = valuesBuf
+
+ br.cs = append(br.cs, blockResultColumn{
+ isConst: true,
+ valueType: valueTypeUnknown,
+ encodedValues: valuesBuf[valuesBufLen:],
+ })
+}
+
+// getColumnValues returns values for the column with the given idx.
+//
+// The returned values are valid until br.reset() is called.
+func (br *blockResult) getColumnValues(idx int) []string {
+ c := &br.cs[idx]
+ if c.values != nil {
+ return c.values
+ }
+
+ buf := br.buf
+ valuesBuf := br.valuesBuf
+ valuesBufLen := len(valuesBuf)
+
+ if c.isConst {
+ v := c.encodedValues[0]
+ for range br.timestamps {
+ valuesBuf = append(valuesBuf, v)
+ }
+ c.values = valuesBuf[valuesBufLen:]
+ br.valuesBuf = valuesBuf
+ return c.values
+ }
+ if c.isTime {
+ for _, timestamp := range br.timestamps {
+ t := time.Unix(0, timestamp).UTC()
+ bufLen := len(buf)
+ buf = t.AppendFormat(buf, time.RFC3339Nano)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ valuesBuf = append(valuesBuf, s)
+ }
+ c.values = valuesBuf[valuesBufLen:]
+ br.buf = buf
+ br.valuesBuf = valuesBuf
+ return c.values
+ }
+
+ appendValue := func(v string) {
+ bufLen := len(buf)
+ buf = append(buf, v...)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ valuesBuf = append(valuesBuf, s)
+ }
+
+ switch c.valueType {
+ case valueTypeString:
+ c.values = c.encodedValues
+ return c.values
+ case valueTypeDict:
+ dictValues := c.dictValues
+ for _, v := range c.encodedValues {
+ dictIdx := v[0]
+ appendValue(dictValues[dictIdx])
+ }
+ case valueTypeUint8:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ n := uint64(v[0])
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeUint16:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint16(b))
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeUint32:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint32(b))
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeUint64:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ b := bytesutil.ToUnsafeBytes(v)
+ n := encoding.UnmarshalUint64(b)
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeFloat64:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ bb.B = toFloat64String(bb.B[:0], v)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeIPv4:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ bb.B = toIPv4String(bb.B[:0], v)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ case valueTypeTimestampISO8601:
+ bb := bbPool.Get()
+ for _, v := range c.encodedValues {
+ bb.B = toTimestampISO8601String(bb.B[:0], v)
+ appendValue(bytesutil.ToUnsafeString(bb.B))
+ }
+ bbPool.Put(bb)
+ default:
+ logger.Panicf("BUG: unknown valueType=%d", c.valueType)
+ }
+
+ c.values = valuesBuf[valuesBufLen:]
+ br.buf = buf
+ br.valuesBuf = valuesBuf
+
+ return c.values
+}
+
+type blockResultColumn struct {
+ // isConst is set to true if the column is const.
+ //
+ // The column value is stored in encodedValues[0]
+ isConst bool
+
+ // isTime is set to true if the column contains _time values.
+ //
+ // The column values are stored in blockResult.timestamps
+ isTime bool
+
+ // valueType is the type of non-cost value
+ valueType valueType
+
+ // dictValues contain dictionary values for valueTypeDict column
+ dictValues []string
+
+ // encodedValues contain encoded values for non-const column
+ encodedValues []string
+
+ // values contain decoded values after getColumnValues() call for the given column
+ values []string
+}
+
+func (c *blockResultColumn) reset() {
+ c.isConst = false
+ c.isTime = false
+ c.valueType = valueTypeUnknown
+ c.dictValues = nil
+ c.encodedValues = nil
+ c.values = nil
+}
diff --git a/lib/logstorage/block_stream_merger.go b/lib/logstorage/block_stream_merger.go
new file mode 100644
index 000000000..6137c2406
--- /dev/null
+++ b/lib/logstorage/block_stream_merger.go
@@ -0,0 +1,288 @@
+package logstorage
+
+import (
+ "container/heap"
+ "fmt"
+ "strings"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// mustMergeBlockStreams merges bsrs to bsw and updates ph accordingly.
+//
+// Finalize() is guaranteed to be called on bsrs and bsw before returning from the func.
+func mustMergeBlockStreams(ph *partHeader, bsw *blockStreamWriter, bsrs []*blockStreamReader, stopCh <-chan struct{}) {
+ bsm := getBlockStreamMerger()
+ bsm.mustInit(bsw, bsrs)
+ for len(bsm.readersHeap) > 0 {
+ if needStop(stopCh) {
+ break
+ }
+ bsr := bsm.readersHeap[0]
+ bsm.mustWriteBlock(&bsr.blockData, bsw)
+ if bsr.NextBlock() {
+ heap.Fix(&bsm.readersHeap, 0)
+ } else {
+ heap.Pop(&bsm.readersHeap)
+ }
+ }
+ bsm.mustFlushRows()
+ putBlockStreamMerger(bsm)
+
+ bsw.Finalize(ph)
+ mustCloseBlockStreamReaders(bsrs)
+}
+
+// blockStreamMerger merges block streams
+type blockStreamMerger struct {
+ // bsw is the block stream writer to write the merged blocks.
+ bsw *blockStreamWriter
+
+ // bsrs contains the original readers passed to mustInit().
+ // They are used by ReadersPaths()
+ bsrs []*blockStreamReader
+
+ // readersHeap contains a heap of readers to read blocks to merge.
+ readersHeap blockStreamReadersHeap
+
+ // streamID is the stream ID for the pending data.
+ streamID streamID
+
+ // sbu is the unmarshaler for strings in rows and rowsTmp.
+ sbu *stringsBlockUnmarshaler
+
+ // vd is the decoder for unmarshaled strings.
+ vd *valuesDecoder
+
+ // bd is the pending blockData.
+ // bd is unpacked into rows when needed.
+ bd blockData
+
+ // rows is pending log entries.
+ rows rows
+
+ // rowsTmp is temporary storage for log entries during merge.
+ rowsTmp rows
+
+ // uncompressedRowsSizeBytes is the current size of uncompressed rows.
+ //
+ // It is used for flushing rows to blocks when their size reaches maxUncompressedBlockSize
+ uncompressedRowsSizeBytes uint64
+}
+
+func (bsm *blockStreamMerger) reset() {
+ bsm.bsw = nil
+
+ rhs := bsm.readersHeap
+ for i := range rhs {
+ rhs[i] = nil
+ }
+ bsm.readersHeap = rhs[:0]
+
+ bsm.streamID.reset()
+ bsm.resetRows()
+}
+
+func (bsm *blockStreamMerger) resetRows() {
+ if bsm.sbu != nil {
+ putStringsBlockUnmarshaler(bsm.sbu)
+ bsm.sbu = nil
+ }
+ if bsm.vd != nil {
+ putValuesDecoder(bsm.vd)
+ bsm.vd = nil
+ }
+ bsm.bd.reset()
+
+ bsm.rows.reset()
+ bsm.rowsTmp.reset()
+
+ bsm.uncompressedRowsSizeBytes = 0
+}
+
+func (bsm *blockStreamMerger) mustInit(bsw *blockStreamWriter, bsrs []*blockStreamReader) {
+ bsm.reset()
+
+ bsm.bsw = bsw
+ bsm.bsrs = bsrs
+
+ rsh := bsm.readersHeap[:0]
+ for _, bsr := range bsrs {
+ if bsr.NextBlock() {
+ rsh = append(rsh, bsr)
+ }
+ }
+ bsm.readersHeap = rsh
+ heap.Init(&bsm.readersHeap)
+}
+
+// mustWriteBlock writes bd to bsm
+func (bsm *blockStreamMerger) mustWriteBlock(bd *blockData, bsw *blockStreamWriter) {
+ bsm.checkNextBlock(bd)
+ switch {
+ case !bd.streamID.equal(&bsm.streamID):
+ // The bd contains another streamID.
+ // Write the current log entries under the current streamID, then process the bd.
+ bsm.mustFlushRows()
+ bsm.streamID = bd.streamID
+ if bd.uncompressedSizeBytes >= maxUncompressedBlockSize {
+ // Fast path - write full bd to the output without extracting log entries from it.
+ bsw.MustWriteBlockData(bd)
+ } else {
+ // Slow path - copy the bd to the curr bd.
+ bsm.bd.copyFrom(bd)
+ }
+ case bd.uncompressedSizeBytes >= maxUncompressedBlockSize:
+ // The bd contains the same streamID and it is full,
+ // so it can be written next after the current log entries
+ // without the need to merge the bd with the current log entries.
+ // Write the current log entries and then the bd.
+ bsm.mustFlushRows()
+ bsw.MustWriteBlockData(bd)
+ default:
+ // The bd contains the same streamID and it isn't full,
+ // so it must be merged with the current log entries.
+ bsm.mustMergeRows(bd)
+ }
+}
+
+// checkNextBlock checks whether the bd can be written next after the current data.
+func (bsm *blockStreamMerger) checkNextBlock(bd *blockData) {
+ if len(bsm.rows.timestamps) > 0 && bsm.bd.rowsCount > 0 {
+ logger.Panicf("BUG: bsm.bd must be empty when bsm.rows isn't empty! got %d log entries in bsm.bd", bsm.bd.rowsCount)
+ }
+ if bd.streamID.less(&bsm.streamID) {
+ logger.Panicf("FATAL: cannot merge %s: the streamID=%s for the next block is smaller than the streamID=%s for the current block",
+ bsm.ReadersPaths(), &bd.streamID, &bsm.streamID)
+ }
+ if !bd.streamID.equal(&bsm.streamID) {
+ return
+ }
+ // streamID at bd equals streamID at bsm. Check that minTimestamp in bd is bigger or equal to the minTimestmap at bsm.
+ if bd.rowsCount == 0 {
+ return
+ }
+ nextMinTimestamp := bd.timestampsData.minTimestamp
+ if len(bsm.rows.timestamps) == 0 {
+ if bsm.bd.rowsCount == 0 {
+ return
+ }
+ minTimestamp := bsm.bd.timestampsData.minTimestamp
+ if nextMinTimestamp < minTimestamp {
+ logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for the current block",
+ bsm.ReadersPaths(), nextMinTimestamp, minTimestamp)
+ }
+ return
+ }
+ minTimestamp := bsm.rows.timestamps[0]
+ if nextMinTimestamp < minTimestamp {
+ logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for log entries for the current block",
+ bsm.ReadersPaths(), nextMinTimestamp, minTimestamp)
+ }
+}
+
+// ReadersPaths returns paths for input blockStreamReaders
+func (bsm *blockStreamMerger) ReadersPaths() string {
+ paths := make([]string, len(bsm.bsrs))
+ for i, bsr := range bsm.bsrs {
+ paths[i] = bsr.Path()
+ }
+ return fmt.Sprintf("[%s]", strings.Join(paths, ","))
+}
+
+// mustMergeRows merges the current log entries inside bsm with bd log entries.
+func (bsm *blockStreamMerger) mustMergeRows(bd *blockData) {
+ if bsm.bd.rowsCount > 0 {
+ // Unmarshal log entries from bsm.bd
+ bsm.mustUnmarshalRows(&bsm.bd)
+ bsm.bd.reset()
+ }
+
+ // Unmarshal log entries from bd
+ rowsLen := len(bsm.rows.timestamps)
+ bsm.mustUnmarshalRows(bd)
+
+ // Merge unmarshaled log entries
+ timestamps := bsm.rows.timestamps
+ rows := bsm.rows.rows
+ bsm.rowsTmp.mergeRows(timestamps[:rowsLen], timestamps[rowsLen:], rows[:rowsLen], rows[rowsLen:])
+ bsm.rows, bsm.rowsTmp = bsm.rowsTmp, bsm.rows
+ bsm.rowsTmp.reset()
+
+ if bsm.uncompressedRowsSizeBytes >= maxUncompressedBlockSize {
+ bsm.mustFlushRows()
+ }
+}
+
+func (bsm *blockStreamMerger) mustUnmarshalRows(bd *blockData) {
+ rowsLen := len(bsm.rows.timestamps)
+ if bsm.sbu == nil {
+ bsm.sbu = getStringsBlockUnmarshaler()
+ }
+ if bsm.vd == nil {
+ bsm.vd = getValuesDecoder()
+ }
+ if err := bd.unmarshalRows(&bsm.rows, bsm.sbu, bsm.vd); err != nil {
+ logger.Panicf("FATAL: cannot merge %s: cannot unmarshal log entries from blockData: %s", bsm.ReadersPaths(), err)
+ }
+ bsm.uncompressedRowsSizeBytes += uncompressedRowsSizeBytes(bsm.rows.rows[rowsLen:])
+}
+
+func (bsm *blockStreamMerger) mustFlushRows() {
+ if len(bsm.rows.timestamps) == 0 {
+ bsm.bsw.MustWriteBlockData(&bsm.bd)
+ } else {
+ bsm.bsw.MustWriteRows(&bsm.streamID, bsm.rows.timestamps, bsm.rows.rows)
+ }
+ bsm.resetRows()
+}
+
+func getBlockStreamMerger() *blockStreamMerger {
+ v := blockStreamMergerPool.Get()
+ if v == nil {
+ return &blockStreamMerger{}
+ }
+ return v.(*blockStreamMerger)
+}
+
+func putBlockStreamMerger(bsm *blockStreamMerger) {
+ bsm.reset()
+ blockStreamMergerPool.Put(bsm)
+}
+
+var blockStreamMergerPool sync.Pool
+
+type blockStreamReadersHeap []*blockStreamReader
+
+func (h *blockStreamReadersHeap) Len() int {
+ return len(*h)
+}
+
+func (h *blockStreamReadersHeap) Less(i, j int) bool {
+ x := *h
+ a := &x[i].blockData
+ b := &x[j].blockData
+ if !a.streamID.equal(&b.streamID) {
+ return a.streamID.less(&b.streamID)
+ }
+ return a.timestampsData.minTimestamp < b.timestampsData.minTimestamp
+}
+
+func (h *blockStreamReadersHeap) Swap(i, j int) {
+ x := *h
+ x[i], x[j] = x[j], x[i]
+}
+
+func (h *blockStreamReadersHeap) Push(v interface{}) {
+ bsr := v.(*blockStreamReader)
+ *h = append(*h, bsr)
+}
+
+func (h *blockStreamReadersHeap) Pop() interface{} {
+ x := *h
+ bsr := x[len(x)-1]
+ x[len(x)-1] = nil
+ *h = x[:len(x)-1]
+ return bsr
+}
diff --git a/lib/logstorage/block_stream_reader.go b/lib/logstorage/block_stream_reader.go
new file mode 100644
index 000000000..00bcbc4e5
--- /dev/null
+++ b/lib/logstorage/block_stream_reader.go
@@ -0,0 +1,383 @@
+package logstorage
+
+import (
+ "path/filepath"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+type readerWithStats struct {
+ r filestream.ReadCloser
+ bytesRead uint64
+}
+
+func (r *readerWithStats) reset() {
+ r.r = nil
+ r.bytesRead = 0
+}
+
+func (r *readerWithStats) init(rc filestream.ReadCloser) {
+ r.reset()
+
+ r.r = rc
+}
+
+// Path returns the path to r file
+func (r *readerWithStats) Path() string {
+ return r.r.Path()
+}
+
+// MustReadFull reads len(data) to r.
+func (r *readerWithStats) MustReadFull(data []byte) {
+ fs.MustReadData(r.r, data)
+ r.bytesRead += uint64(len(data))
+}
+
+func (r *readerWithStats) Read(p []byte) (int, error) {
+ n, err := r.r.Read(p)
+ r.bytesRead += uint64(n)
+ return n, err
+}
+
+func (r *readerWithStats) MustClose() {
+ r.r.MustClose()
+ r.r = nil
+}
+
+// streamReaders contains readers for blockStreamReader
+type streamReaders struct {
+ metaindexReader readerWithStats
+ indexReader readerWithStats
+ columnsHeaderReader readerWithStats
+ timestampsReader readerWithStats
+ fieldValuesReader readerWithStats
+ fieldBloomFilterReader readerWithStats
+ messageValuesReader readerWithStats
+ messageBloomFilterReader readerWithStats
+}
+
+func (sr *streamReaders) reset() {
+ sr.metaindexReader.reset()
+ sr.indexReader.reset()
+ sr.columnsHeaderReader.reset()
+ sr.timestampsReader.reset()
+ sr.fieldValuesReader.reset()
+ sr.fieldBloomFilterReader.reset()
+ sr.messageValuesReader.reset()
+ sr.messageBloomFilterReader.reset()
+}
+
+func (sr *streamReaders) init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader, fieldValuesReader, fieldBloomFilterReader,
+ messageValuesReader, messageBloomFilterReader filestream.ReadCloser,
+) {
+ sr.metaindexReader.init(metaindexReader)
+ sr.indexReader.init(indexReader)
+ sr.columnsHeaderReader.init(columnsHeaderReader)
+ sr.timestampsReader.init(timestampsReader)
+ sr.fieldValuesReader.init(fieldValuesReader)
+ sr.fieldBloomFilterReader.init(fieldBloomFilterReader)
+ sr.messageValuesReader.init(messageValuesReader)
+ sr.messageBloomFilterReader.init(messageBloomFilterReader)
+}
+
+func (sr *streamReaders) totalBytesRead() uint64 {
+ n := uint64(0)
+ n += sr.metaindexReader.bytesRead
+ n += sr.indexReader.bytesRead
+ n += sr.columnsHeaderReader.bytesRead
+ n += sr.timestampsReader.bytesRead
+ n += sr.fieldValuesReader.bytesRead
+ n += sr.fieldBloomFilterReader.bytesRead
+ n += sr.messageValuesReader.bytesRead
+ n += sr.messageBloomFilterReader.bytesRead
+ return n
+}
+
+func (sr *streamReaders) MustClose() {
+ sr.metaindexReader.MustClose()
+ sr.indexReader.MustClose()
+ sr.columnsHeaderReader.MustClose()
+ sr.timestampsReader.MustClose()
+ sr.fieldValuesReader.MustClose()
+ sr.fieldBloomFilterReader.MustClose()
+ sr.messageValuesReader.MustClose()
+ sr.messageBloomFilterReader.MustClose()
+}
+
+// blockStreamReader is used for reading blocks in streaming manner from a part.
+type blockStreamReader struct {
+ // blockData contains the data for the last read block
+ blockData blockData
+
+ // ph is the header for the part
+ ph partHeader
+
+ // streamReaders contains data readers in stream mode
+ streamReaders streamReaders
+
+ // indexBlockHeaders contains the list of all the indexBlockHeader entries for the part
+ indexBlockHeaders []indexBlockHeader
+
+ // blockHeaders contains the list of blockHeader entries for the current indexBlockHeader pointed by nextIndexBlockIdx
+ blockHeaders []blockHeader
+
+ // nextIndexBlockIdx is the index of the next item to read from indexBlockHeaders
+ nextIndexBlockIdx int
+
+ // nextBlockIdx is the index of the next item to read from blockHeaders
+ nextBlockIdx int
+
+ // globalUncompressedSizeBytes is the total size of log entries seen in the part
+ globalUncompressedSizeBytes uint64
+
+ // globalRowsCount is the number of log entries seen in the part
+ globalRowsCount uint64
+
+ // globalBlocksCount is the number of blocks seen in the part
+ globalBlocksCount uint64
+
+ // sidLast is the stream id for the previously read block
+ sidLast streamID
+
+ // minTimestampLast is the minimum timestamp for the previously read block
+ minTimestampLast int64
+}
+
+// reset resets bsr, so it can be re-used
+func (bsr *blockStreamReader) reset() {
+ bsr.blockData.reset()
+ bsr.ph.reset()
+ bsr.streamReaders.reset()
+
+ ihs := bsr.indexBlockHeaders
+ if len(ihs) > 10e3 {
+ // The ihs len is unbound, so it is better to drop too long indexBlockHeaders in order to reduce memory usage
+ ihs = nil
+ }
+ for i := range ihs {
+ ihs[i].reset()
+ }
+ bsr.indexBlockHeaders = ihs[:0]
+
+ bhs := bsr.blockHeaders
+ for i := range bhs {
+ bhs[i].reset()
+ }
+ bsr.blockHeaders = bhs[:0]
+
+ bsr.nextIndexBlockIdx = 0
+ bsr.nextBlockIdx = 0
+ bsr.globalUncompressedSizeBytes = 0
+ bsr.globalRowsCount = 0
+ bsr.globalBlocksCount = 0
+
+ bsr.sidLast.reset()
+ bsr.minTimestampLast = 0
+}
+
+// Path returns part path for bsr (e.g. file path, url or in-memory reference)
+func (bsr *blockStreamReader) Path() string {
+ path := bsr.streamReaders.metaindexReader.Path()
+ return filepath.Dir(path)
+}
+
+// MustInitFromInmemoryPart initializes bsr from mp.
+func (bsr *blockStreamReader) MustInitFromInmemoryPart(mp *inmemoryPart) {
+ bsr.reset()
+
+ bsr.ph = mp.ph
+
+ // Initialize streamReaders
+ metaindexReader := mp.metaindex.NewReader()
+ indexReader := mp.index.NewReader()
+ columnsHeaderReader := mp.columnsHeader.NewReader()
+ timestampsReader := mp.timestamps.NewReader()
+ fieldValuesReader := mp.fieldValues.NewReader()
+ fieldBloomFilterReader := mp.fieldBloomFilter.NewReader()
+ messageValuesReader := mp.messageValues.NewReader()
+ messageBloomFilterReader := mp.messageBloomFilter.NewReader()
+
+ bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader,
+ fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader)
+
+ // Read metaindex data
+ bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader)
+}
+
+// MustInitFromFilePart initializes bsr from file part at the given path.
+func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
+ bsr.reset()
+
+ // Files in the part are always read without OS cache pollution,
+ // since they are usually deleted after the merge.
+ const nocache = true
+
+ metaindexPath := filepath.Join(path, metaindexFilename)
+ indexPath := filepath.Join(path, indexFilename)
+ columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
+ timestampsPath := filepath.Join(path, timestampsFilename)
+ fieldValuesPath := filepath.Join(path, fieldValuesFilename)
+ fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
+ messageValuesPath := filepath.Join(path, messageValuesFilename)
+ messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
+
+ bsr.ph.mustReadMetadata(path)
+
+ // Open data readers
+ metaindexReader := filestream.MustOpen(metaindexPath, nocache)
+ indexReader := filestream.MustOpen(indexPath, nocache)
+ columnsHeaderReader := filestream.MustOpen(columnsHeaderPath, nocache)
+ timestampsReader := filestream.MustOpen(timestampsPath, nocache)
+ fieldValuesReader := filestream.MustOpen(fieldValuesPath, nocache)
+ fieldBloomFilterReader := filestream.MustOpen(fieldBloomFilterPath, nocache)
+ messageValuesReader := filestream.MustOpen(messageValuesPath, nocache)
+ messageBloomFilterReader := filestream.MustOpen(messageBloomFilterPath, nocache)
+
+ // Initialize streamReaders
+ bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader,
+ fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader)
+
+ // Read metaindex data
+ bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader)
+}
+
+// NextBlock reads the next block from bsr and puts it into bsr.blockData.
+//
+// false is returned if there are no other blocks.
+func (bsr *blockStreamReader) NextBlock() bool {
+ for bsr.nextBlockIdx >= len(bsr.blockHeaders) {
+ if !bsr.nextIndexBlock() {
+ return false
+ }
+ }
+ ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx-1]
+ bh := &bsr.blockHeaders[bsr.nextBlockIdx]
+ th := &bh.timestampsHeader
+
+ // Validate bh
+ if bh.streamID.less(&bsr.sidLast) {
+ logger.Panicf("FATAL: %s: blockHeader.streamID=%s cannot be smaller than the streamID from the previously read block: %s", bsr.Path(), &bh.streamID, &bsr.sidLast)
+ }
+ if bh.streamID.equal(&bsr.sidLast) && th.minTimestamp < bsr.minTimestampLast {
+ logger.Panicf("FATAL: %s: timestamps.minTimestamp=%d cannot be smaller than the minTimestamp for the previously read block for the same streamID: %d",
+ bsr.Path(), th.minTimestamp, bsr.minTimestampLast)
+ }
+ bsr.minTimestampLast = th.minTimestamp
+ bsr.sidLast = bh.streamID
+ if th.minTimestamp < ih.minTimestamp {
+ logger.Panicf("FATAL: %s: timestampsHeader.minTimestamp=%d cannot be smaller than indexBlockHeader.minTimestamp=%d", bsr.Path(), th.minTimestamp, ih.minTimestamp)
+ }
+ if th.maxTimestamp > ih.maxTimestamp {
+ logger.Panicf("FATAL: %s: timestampsHeader.maxTimestamp=%d cannot be bigger than indexBlockHeader.maxTimestamp=%d", bsr.Path(), th.maxTimestamp, ih.minTimestamp)
+ }
+
+ // Read bsr.blockData
+ bsr.blockData.mustReadFrom(bh, &bsr.streamReaders)
+
+ bsr.globalUncompressedSizeBytes += bh.uncompressedSizeBytes
+ bsr.globalRowsCount += bh.rowsCount
+ bsr.globalBlocksCount++
+ if bsr.globalUncompressedSizeBytes > bsr.ph.UncompressedSizeBytes {
+ logger.Panicf("FATAL: %s: too big size of entries read: %d; mustn't exceed partHeader.UncompressedSizeBytes=%d",
+ bsr.Path(), bsr.globalUncompressedSizeBytes, bsr.ph.UncompressedSizeBytes)
+ }
+ if bsr.globalRowsCount > bsr.ph.RowsCount {
+ logger.Panicf("FATAL: %s: too many log entries read so far: %d; mustn't exceed partHeader.RowsCount=%d", bsr.Path(), bsr.globalRowsCount, bsr.ph.RowsCount)
+ }
+ if bsr.globalBlocksCount > bsr.ph.BlocksCount {
+ logger.Panicf("FATAL: %s: too many blocks read so far: %d; mustn't exceed partHeader.BlocksCount=%d", bsr.Path(), bsr.globalBlocksCount, bsr.ph.BlocksCount)
+ }
+
+ // The block has been sucessfully read
+ bsr.nextBlockIdx++
+ return true
+}
+
+func (bsr *blockStreamReader) nextIndexBlock() bool {
+ // Advance to the next indexBlockHeader
+ if bsr.nextIndexBlockIdx >= len(bsr.indexBlockHeaders) {
+ // No more blocks left
+ // Validate bsr.ph
+ totalBytesRead := bsr.streamReaders.totalBytesRead()
+ if bsr.ph.CompressedSizeBytes != totalBytesRead {
+ logger.Panicf("FATAL: %s: partHeader.CompressedSizeBytes=%d must match the size of data read: %d", bsr.Path(), bsr.ph.CompressedSizeBytes, totalBytesRead)
+ }
+ if bsr.ph.UncompressedSizeBytes != bsr.globalUncompressedSizeBytes {
+ logger.Panicf("FATAL: %s: partHeader.UncompressedSizeBytes=%d must match the size of entries read: %d",
+ bsr.Path(), bsr.ph.UncompressedSizeBytes, bsr.globalUncompressedSizeBytes)
+ }
+ if bsr.ph.RowsCount != bsr.globalRowsCount {
+ logger.Panicf("FATAL: %s: partHeader.RowsCount=%d must match the number of log entries read: %d", bsr.Path(), bsr.ph.RowsCount, bsr.globalRowsCount)
+ }
+ if bsr.ph.BlocksCount != bsr.globalBlocksCount {
+ logger.Panicf("FATAL: %s: partHeader.BlocksCount=%d must match the number of blocks read: %d", bsr.Path(), bsr.ph.BlocksCount, bsr.globalBlocksCount)
+ }
+ return false
+ }
+ ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx]
+
+ // Validate ih
+ metaindexReader := &bsr.streamReaders.metaindexReader
+ if ih.minTimestamp < bsr.ph.MinTimestamp {
+ logger.Panicf("FATAL: %s: indexBlockHeader.minTimestamp=%d cannot be smaller than partHeader.MinTimestamp=%d",
+ metaindexReader.Path(), ih.minTimestamp, bsr.ph.MinTimestamp)
+ }
+ if ih.maxTimestamp > bsr.ph.MaxTimestamp {
+ logger.Panicf("FATAL: %s: indexBlockHeader.maxTimestamp=%d cannot be bigger than partHeader.MaxTimestamp=%d",
+ metaindexReader.Path(), ih.maxTimestamp, bsr.ph.MaxTimestamp)
+ }
+
+ // Read indexBlock for the given ih
+ bb := longTermBufPool.Get()
+ bb.B = ih.mustReadNextIndexBlock(bb.B[:0], &bsr.streamReaders)
+ bsr.blockHeaders = resetBlockHeaders(bsr.blockHeaders)
+ var err error
+ bsr.blockHeaders, err = unmarshalBlockHeaders(bsr.blockHeaders[:0], bb.B)
+ longTermBufPool.Put(bb)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot unmarshal blockHeader entries: %s", bsr.streamReaders.indexReader.Path(), err)
+ }
+
+ bsr.nextIndexBlockIdx++
+ bsr.nextBlockIdx = 0
+ return true
+}
+
+// MustClose closes bsr.
+func (bsr *blockStreamReader) MustClose() {
+ bsr.streamReaders.MustClose()
+ bsr.reset()
+}
+
+// getBlockStreamReader returns blockStreamReader.
+//
+// The returned blockStreamReader must be initialized with MustInit().
+// call putBlockStreamReader() when the retruend blockStreamReader is no longer needed.
+func getBlockStreamReader() *blockStreamReader {
+ v := blockStreamReaderPool.Get()
+ if v == nil {
+ v = &blockStreamReader{}
+ }
+ bsr := v.(*blockStreamReader)
+ return bsr
+}
+
+// putBlockStreamReader returns bsr to the pool.
+//
+// bsr cannot be used after returning to the pool.
+func putBlockStreamReader(bsr *blockStreamReader) {
+ bsr.reset()
+ blockStreamReaderPool.Put(bsr)
+}
+
+var blockStreamReaderPool sync.Pool
+
+// mustCloseBlockStreamReaders calls MustClose() on the given bsrs.
+func mustCloseBlockStreamReaders(bsrs []*blockStreamReader) {
+ for _, bsr := range bsrs {
+ bsr.MustClose()
+ }
+}
diff --git a/lib/logstorage/block_stream_writer.go b/lib/logstorage/block_stream_writer.go
new file mode 100644
index 000000000..c16740b81
--- /dev/null
+++ b/lib/logstorage/block_stream_writer.go
@@ -0,0 +1,362 @@
+package logstorage
+
+import (
+ "path/filepath"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// writerWithStats writes data to w and tracks the total amounts of data written at bytesWritten.
+type writerWithStats struct {
+ w filestream.WriteCloser
+ bytesWritten uint64
+}
+
+func (w *writerWithStats) reset() {
+ w.w = nil
+ w.bytesWritten = 0
+}
+
+func (w *writerWithStats) init(wc filestream.WriteCloser) {
+ w.reset()
+
+ w.w = wc
+}
+
+func (w *writerWithStats) Path() string {
+ return w.w.Path()
+}
+
+func (w *writerWithStats) MustWrite(data []byte) {
+ fs.MustWriteData(w.w, data)
+ w.bytesWritten += uint64(len(data))
+}
+
+// MustClose closes the underlying w.
+func (w *writerWithStats) MustClose() {
+ w.w.MustClose()
+}
+
+// streamWriters contain writers for blockStreamWriter
+type streamWriters struct {
+ metaindexWriter writerWithStats
+ indexWriter writerWithStats
+ columnsHeaderWriter writerWithStats
+ timestampsWriter writerWithStats
+ fieldValuesWriter writerWithStats
+ fieldBloomFilterWriter writerWithStats
+ messageValuesWriter writerWithStats
+ messageBloomFilterWriter writerWithStats
+}
+
+func (sw *streamWriters) reset() {
+ sw.metaindexWriter.reset()
+ sw.indexWriter.reset()
+ sw.columnsHeaderWriter.reset()
+ sw.timestampsWriter.reset()
+ sw.fieldValuesWriter.reset()
+ sw.fieldBloomFilterWriter.reset()
+ sw.messageValuesWriter.reset()
+ sw.messageBloomFilterWriter.reset()
+}
+
+func (sw *streamWriters) init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter, fieldValuesWriter, fieldBloomFilterWriter,
+ messageValuesWriter, messageBloomFilterWriter filestream.WriteCloser,
+) {
+ sw.metaindexWriter.init(metaindexWriter)
+ sw.indexWriter.init(indexWriter)
+ sw.columnsHeaderWriter.init(columnsHeaderWriter)
+ sw.timestampsWriter.init(timestampsWriter)
+ sw.fieldValuesWriter.init(fieldValuesWriter)
+ sw.fieldBloomFilterWriter.init(fieldBloomFilterWriter)
+ sw.messageValuesWriter.init(messageValuesWriter)
+ sw.messageBloomFilterWriter.init(messageBloomFilterWriter)
+}
+
+func (sw *streamWriters) totalBytesWritten() uint64 {
+ n := uint64(0)
+ n += sw.metaindexWriter.bytesWritten
+ n += sw.indexWriter.bytesWritten
+ n += sw.columnsHeaderWriter.bytesWritten
+ n += sw.timestampsWriter.bytesWritten
+ n += sw.fieldValuesWriter.bytesWritten
+ n += sw.fieldBloomFilterWriter.bytesWritten
+ n += sw.messageValuesWriter.bytesWritten
+ n += sw.messageBloomFilterWriter.bytesWritten
+ return n
+}
+
+func (sw *streamWriters) MustClose() {
+ sw.metaindexWriter.MustClose()
+ sw.indexWriter.MustClose()
+ sw.columnsHeaderWriter.MustClose()
+ sw.timestampsWriter.MustClose()
+ sw.fieldValuesWriter.MustClose()
+ sw.fieldBloomFilterWriter.MustClose()
+ sw.messageValuesWriter.MustClose()
+ sw.messageBloomFilterWriter.MustClose()
+}
+
+// blockStreamWriter is used for writing blocks into the underlying storage in streaming manner.
+type blockStreamWriter struct {
+ // streamWriters contains writer for block data
+ streamWriters streamWriters
+
+ // sidLast is the streamID for the last written block
+ sidLast streamID
+
+ // sidFirst is the streamID for the first block in the current indexBlock
+ sidFirst streamID
+
+ // minTimestampLast is the minimum timestamp seen for the last written block
+ minTimestampLast int64
+
+ // minTimestamp is the minimum timestamp seen across written blocks for the current indexBlock
+ minTimestamp int64
+
+ // maxTimestamp is the maximum timestamp seen across written blocks for the current indexBlock
+ maxTimestamp int64
+
+ // hasWrittenBlocks is set to true if at least a single block is written to the current indexBlock
+ hasWrittenBlocks bool
+
+ // globalUncompressedSizeBytes is the total size of all the log entries written via bsw
+ globalUncompressedSizeBytes uint64
+
+ // globalRowsCount is the total number of log entries written via bsw
+ globalRowsCount uint64
+
+ // globalBlocksCount is the total number of blocks written to bsw
+ globalBlocksCount uint64
+
+ // globalMinTimestamp is the minimum timestamp seen across all the blocks written to bsw
+ globalMinTimestamp int64
+
+ // globalMaxTimestamp is the maximum timestamp seen across all the blocks written to bsw
+ globalMaxTimestamp int64
+
+ // indexBlockData contains marshaled blockHeader data, which isn't written yet to indexFilename
+ indexBlockData []byte
+
+ // metaindexData contains marshaled indexBlockHeader data, which isn't written yet to metaindexFilename
+ metaindexData []byte
+
+ // indexBlockHeader is used for marshaling the data to metaindexData
+ indexBlockHeader indexBlockHeader
+}
+
+// reset resets bsw for subsequent re-use.
+func (bsw *blockStreamWriter) reset() {
+ bsw.streamWriters.reset()
+ bsw.sidLast.reset()
+ bsw.sidFirst.reset()
+ bsw.minTimestampLast = 0
+ bsw.minTimestamp = 0
+ bsw.maxTimestamp = 0
+ bsw.hasWrittenBlocks = false
+ bsw.globalUncompressedSizeBytes = 0
+ bsw.globalRowsCount = 0
+ bsw.globalBlocksCount = 0
+ bsw.globalMinTimestamp = 0
+ bsw.globalMaxTimestamp = 0
+ bsw.indexBlockData = bsw.indexBlockData[:0]
+
+ if len(bsw.metaindexData) > 1024*1024 {
+ // The length of bsw.metaindexData is unbound, so drop too long buffer
+ // in order to conserve memory.
+ bsw.metaindexData = nil
+ } else {
+ bsw.metaindexData = bsw.metaindexData[:0]
+ }
+
+ bsw.indexBlockHeader.reset()
+}
+
+// MustInitFromInmemoryPart initializes bsw from mp
+func (bsw *blockStreamWriter) MustInitForInmemoryPart(mp *inmemoryPart) {
+ bsw.reset()
+ bsw.streamWriters.init(&mp.metaindex, &mp.index, &mp.columnsHeader, &mp.timestamps, &mp.fieldValues, &mp.fieldBloomFilter, &mp.messageValues, &mp.messageBloomFilter)
+}
+
+// MustInitForFilePart initializes bsw for writing data to file part located at path.
+//
+// if nocache is true, then the written data doesn't go to OS page cache.
+func (bsw *blockStreamWriter) MustInitForFilePart(path string, nocache bool) {
+ bsw.reset()
+
+ fs.MustMkdirFailIfExist(path)
+
+ metaindexPath := filepath.Join(path, metaindexFilename)
+ indexPath := filepath.Join(path, indexFilename)
+ columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
+ timestampsPath := filepath.Join(path, timestampsFilename)
+ fieldValuesPath := filepath.Join(path, fieldValuesFilename)
+ fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
+ messageValuesPath := filepath.Join(path, messageValuesFilename)
+ messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
+
+ // Always cache metaindex file, since it it re-read immediately after part creation
+ metaindexWriter := filestream.MustCreate(metaindexPath, false)
+
+ indexWriter := filestream.MustCreate(indexPath, nocache)
+ columnsHeaderWriter := filestream.MustCreate(columnsHeaderPath, nocache)
+ timestampsWriter := filestream.MustCreate(timestampsPath, nocache)
+ fieldValuesWriter := filestream.MustCreate(fieldValuesPath, nocache)
+ fieldBloomFilterWriter := filestream.MustCreate(fieldBloomFilterPath, nocache)
+ messageValuesWriter := filestream.MustCreate(messageValuesPath, nocache)
+ messageBloomFilterWriter := filestream.MustCreate(messageBloomFilterPath, nocache)
+
+ bsw.streamWriters.init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter,
+ fieldValuesWriter, fieldBloomFilterWriter, messageValuesWriter, messageBloomFilterWriter)
+}
+
+// MustWriteRows writes timestamps with rows under the given sid to bsw.
+//
+// timestamps must be sorted.
+// sid must be bigger or equal to the sid for the previously written rs.
+func (bsw *blockStreamWriter) MustWriteRows(sid *streamID, timestamps []int64, rows [][]Field) {
+ if len(timestamps) == 0 {
+ return
+ }
+
+ b := getBlock()
+ b.MustInitFromRows(timestamps, rows)
+ bsw.MustWriteBlock(sid, b)
+ putBlock(b)
+}
+
+// MustWriteBlockData writes bd to bsw.
+//
+// The bd.streamID must be bigger or equal to the streamID for the previously written blocks.
+func (bsw *blockStreamWriter) MustWriteBlockData(bd *blockData) {
+ if bd.rowsCount == 0 {
+ return
+ }
+ bsw.mustWriteBlockInternal(&bd.streamID, nil, bd)
+}
+
+// MustWriteBlock writes b under the given sid to bsw.
+//
+// The sid must be bigger or equal to the sid for the previously written blocks.
+// The minimum timestamp in b must be bigger or equal to the minimum timestamp written to the same sid.
+func (bsw *blockStreamWriter) MustWriteBlock(sid *streamID, b *block) {
+ rowsCount := b.Len()
+ if rowsCount == 0 {
+ return
+ }
+ bsw.mustWriteBlockInternal(sid, b, nil)
+}
+
+func (bsw *blockStreamWriter) mustWriteBlockInternal(sid *streamID, b *block, bd *blockData) {
+ if sid.less(&bsw.sidLast) {
+ logger.Panicf("BUG: the sid=%s cannot be smaller than the previously written sid=%s", sid, &bsw.sidLast)
+ }
+ hasWrittenBlocks := bsw.hasWrittenBlocks
+ if !hasWrittenBlocks {
+ bsw.sidFirst = *sid
+ bsw.hasWrittenBlocks = true
+ }
+ isSeenSid := sid.equal(&bsw.sidLast)
+ bsw.sidLast = *sid
+
+ bh := getBlockHeader()
+ if b != nil {
+ b.mustWriteTo(sid, bh, &bsw.streamWriters)
+ } else {
+ bd.mustWriteTo(bh, &bsw.streamWriters)
+ }
+ th := &bh.timestampsHeader
+ if bsw.globalRowsCount == 0 || th.minTimestamp < bsw.globalMinTimestamp {
+ bsw.globalMinTimestamp = th.minTimestamp
+ }
+ if bsw.globalRowsCount == 0 || th.maxTimestamp > bsw.globalMaxTimestamp {
+ bsw.globalMaxTimestamp = th.maxTimestamp
+ }
+ if !hasWrittenBlocks || th.minTimestamp < bsw.minTimestamp {
+ bsw.minTimestamp = th.minTimestamp
+ }
+ if !hasWrittenBlocks || th.maxTimestamp > bsw.maxTimestamp {
+ bsw.maxTimestamp = th.maxTimestamp
+ }
+ if isSeenSid && th.minTimestamp < bsw.minTimestampLast {
+ logger.Panicf("BUG: the block for sid=%s cannot contain timestamp smaller than %d, but it contains timestamp %d", sid, bsw.minTimestampLast, th.minTimestamp)
+ }
+ bsw.minTimestampLast = th.minTimestamp
+
+ bsw.globalUncompressedSizeBytes += bh.uncompressedSizeBytes
+ bsw.globalRowsCount += bh.rowsCount
+ bsw.globalBlocksCount++
+
+ // Marshal bh
+ bsw.indexBlockData = bh.marshal(bsw.indexBlockData)
+ putBlockHeader(bh)
+ if len(bsw.indexBlockData) > maxUncompressedIndexBlockSize {
+ bsw.mustFlushIndexBlock(bsw.indexBlockData)
+ bsw.indexBlockData = bsw.indexBlockData[:0]
+ }
+}
+
+func (bsw *blockStreamWriter) mustFlushIndexBlock(data []byte) {
+ if len(data) > 0 {
+ bsw.indexBlockHeader.mustWriteIndexBlock(data, bsw.sidFirst, bsw.minTimestamp, bsw.maxTimestamp, &bsw.streamWriters)
+ bsw.metaindexData = bsw.indexBlockHeader.marshal(bsw.metaindexData)
+ }
+ bsw.hasWrittenBlocks = false
+ bsw.minTimestamp = 0
+ bsw.maxTimestamp = 0
+ bsw.sidFirst.reset()
+}
+
+// Finalize() finalizes the data write process and updates ph with the finalized stats
+//
+// It closes the writers passed to MustInit().
+//
+// bsw can be re-used after calling Finalize().
+func (bsw *blockStreamWriter) Finalize(ph *partHeader) {
+ ph.UncompressedSizeBytes = bsw.globalUncompressedSizeBytes
+ ph.RowsCount = bsw.globalRowsCount
+ ph.BlocksCount = bsw.globalBlocksCount
+ ph.MinTimestamp = bsw.globalMinTimestamp
+ ph.MaxTimestamp = bsw.globalMaxTimestamp
+
+ bsw.mustFlushIndexBlock(bsw.indexBlockData)
+
+ // Write metaindex data
+ bb := longTermBufPool.Get()
+ bb.B = encoding.CompressZSTDLevel(bb.B[:0], bsw.metaindexData, 1)
+ bsw.streamWriters.metaindexWriter.MustWrite(bb.B)
+ if len(bb.B) < 1024*1024 {
+ longTermBufPool.Put(bb)
+ }
+
+ ph.CompressedSizeBytes = bsw.streamWriters.totalBytesWritten()
+
+ bsw.streamWriters.MustClose()
+ bsw.reset()
+}
+
+var longTermBufPool bytesutil.ByteBufferPool
+
+// getBlockStreamWriter returns new blockStreamWriter from the pool.
+//
+// Return back the blockStreamWriter to the pool when it is no longer needed by calling putBlockStreamWriter.
+func getBlockStreamWriter() *blockStreamWriter {
+ v := blockStreamWriterPool.Get()
+ if v == nil {
+ return &blockStreamWriter{}
+ }
+ return v.(*blockStreamWriter)
+}
+
+// putBlockStreamWriter returns bsw to the pool.
+func putBlockStreamWriter(bsw *blockStreamWriter) {
+ bsw.reset()
+ blockStreamWriterPool.Put(bsw)
+}
+
+var blockStreamWriterPool sync.Pool
diff --git a/lib/logstorage/block_test.go b/lib/logstorage/block_test.go
new file mode 100644
index 000000000..b68b7ea41
--- /dev/null
+++ b/lib/logstorage/block_test.go
@@ -0,0 +1,179 @@
+package logstorage
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+)
+
+func TestBlockMustInitFromRows(t *testing.T) {
+ f := func(timestamps []int64, rows [][]Field, bExpected *block) {
+ t.Helper()
+ b := getBlock()
+ defer putBlock(b)
+
+ b.MustInitFromRows(timestamps, rows)
+ if b.uncompressedSizeBytes() >= maxUncompressedBlockSize {
+ t.Fatalf("expecting non-full block")
+ }
+ if !reflect.DeepEqual(b, bExpected) {
+ t.Fatalf("unexpected block;\ngot\n%v\nwant\n%v", b, bExpected)
+ }
+ if n := b.Len(); n != len(timestamps) {
+ t.Fatalf("unexpected block len; got %d; want %d", n, len(timestamps))
+ }
+ b.assertValid()
+ }
+
+ // An empty log entries
+ f(nil, nil, &block{})
+ f([]int64{}, [][]Field{}, &block{})
+
+ // A single row
+ timestamps := []int64{1234}
+ rows := [][]Field{
+ {
+ {
+ Name: "msg",
+ Value: "foo",
+ },
+ {
+ Name: "level",
+ Value: "error",
+ },
+ },
+ }
+ bExpected := &block{
+ timestamps: []int64{1234},
+ constColumns: []Field{
+ {
+ Name: "level",
+ Value: "error",
+ },
+ {
+ Name: "msg",
+ Value: "foo",
+ },
+ },
+ }
+ f(timestamps, rows, bExpected)
+
+ // Multiple log entries with the same set of fields
+ timestamps = []int64{3, 5}
+ rows = [][]Field{
+ {
+ {
+ Name: "job",
+ Value: "foo",
+ },
+ {
+ Name: "instance",
+ Value: "host1",
+ },
+ },
+ {
+ {
+ Name: "job",
+ Value: "foo",
+ },
+ {
+ Name: "instance",
+ Value: "host2",
+ },
+ },
+ }
+ bExpected = &block{
+ timestamps: []int64{3, 5},
+ columns: []column{
+ {
+ name: "instance",
+ values: []string{"host1", "host2"},
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "job",
+ Value: "foo",
+ },
+ },
+ }
+ f(timestamps, rows, bExpected)
+
+ // Multiple log entries with distinct set of fields
+ timestamps = []int64{3, 5, 10}
+ rows = [][]Field{
+ {
+ {
+ Name: "msg",
+ Value: "foo",
+ },
+ {
+ Name: "b",
+ Value: "xyz",
+ },
+ },
+ {
+ {
+ Name: "b",
+ Value: "xyz",
+ },
+ {
+ Name: "a",
+ Value: "aaa",
+ },
+ },
+ {
+ {
+ Name: "b",
+ Value: "xyz",
+ },
+ },
+ }
+ bExpected = &block{
+ timestamps: []int64{3, 5, 10},
+ columns: []column{
+ {
+ name: "a",
+ values: []string{"", "aaa", ""},
+ },
+ {
+ name: "msg",
+ values: []string{"foo", "", ""},
+ },
+ },
+ constColumns: []Field{
+ {
+ Name: "b",
+ Value: "xyz",
+ },
+ },
+ }
+ f(timestamps, rows, bExpected)
+}
+
+func TestBlockMustInitFromRowsFullBlock(t *testing.T) {
+ const rowsCount = 2000
+ timestamps := make([]int64, rowsCount)
+ rows := make([][]Field, rowsCount)
+ for i := range timestamps {
+ fields := make([]Field, 10)
+ for j := range fields {
+ fields[j] = Field{
+ Name: fmt.Sprintf("field_%d", j),
+ Value: "very very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong value",
+ }
+ }
+ rows[i] = fields
+ }
+
+ b := getBlock()
+ defer putBlock(b)
+ b.MustInitFromRows(timestamps, rows)
+ if n := b.Len(); n != len(rows) {
+ t.Fatalf("unexpected total log entries; got %d; want %d", n, len(rows))
+ }
+ if b.uncompressedSizeBytes() < maxUncompressedBlockSize {
+ t.Fatalf("expecting full block")
+ }
+ b.assertValid()
+}
diff --git a/lib/logstorage/block_timing_test.go b/lib/logstorage/block_timing_test.go
new file mode 100644
index 000000000..9d2a5e15a
--- /dev/null
+++ b/lib/logstorage/block_timing_test.go
@@ -0,0 +1,46 @@
+package logstorage
+
+import (
+ "fmt"
+ "testing"
+)
+
+func BenchmarkBlock_MustInitFromRows(b *testing.B) {
+ for _, rowsPerBlock := range []int{1, 10, 100, 1000, 10000} {
+ b.Run(fmt.Sprintf("rowsPerBlock_%d", rowsPerBlock), func(b *testing.B) {
+ benchmarkBlockMustInitFromRows(b, rowsPerBlock)
+ })
+ }
+}
+
+func benchmarkBlockMustInitFromRows(b *testing.B, rowsPerBlock int) {
+ timestamps, rows := newTestRows(rowsPerBlock, 10)
+ b.ReportAllocs()
+ b.SetBytes(int64(len(timestamps)))
+ b.RunParallel(func(pb *testing.PB) {
+ block := getBlock()
+ defer putBlock(block)
+ for pb.Next() {
+ block.MustInitFromRows(timestamps, rows)
+ if n := block.Len(); n != len(timestamps) {
+ panic(fmt.Errorf("unexpected block length; got %d; want %d", n, len(timestamps)))
+ }
+ }
+ })
+}
+
+func newTestRows(rowsCount, fieldsPerRow int) ([]int64, [][]Field) {
+ timestamps := make([]int64, rowsCount)
+ rows := make([][]Field, rowsCount)
+ for i := range timestamps {
+ timestamps[i] = int64(i) * 1e9
+ fields := make([]Field, fieldsPerRow)
+ for j := range fields {
+ f := &fields[j]
+ f.Name = fmt.Sprintf("field_%d", j)
+ f.Value = fmt.Sprintf("value_%d_%d", i, j)
+ }
+ rows[i] = fields
+ }
+ return timestamps, rows
+}
diff --git a/lib/logstorage/bloomfilter.go b/lib/logstorage/bloomfilter.go
new file mode 100644
index 000000000..723949cf3
--- /dev/null
+++ b/lib/logstorage/bloomfilter.go
@@ -0,0 +1,176 @@
+package logstorage
+
+import (
+ "fmt"
+ "sync"
+ "unsafe"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/cespare/xxhash/v2"
+)
+
+// bloomFilterHashesCount is the number of different hashes to use for bloom filter.
+const bloomFilterHashesCount = 6
+
+// bloomFilterBitsPerItem is the number of bits to use per each token.
+const bloomFilterBitsPerItem = 16
+
+// bloomFilterMarshal appends marshaled bloom filter for tokens to dst and returns the result.
+func bloomFilterMarshal(dst []byte, tokens []string) []byte {
+ bf := getBloomFilter()
+ bf.mustInit(tokens)
+ dst = bf.marshal(dst)
+ putBloomFilter(bf)
+ return dst
+}
+
+type bloomFilter struct {
+ bits []uint64
+}
+
+func (bf *bloomFilter) reset() {
+ bits := bf.bits
+ for i := range bits {
+ bits[i] = 0
+ }
+ bf.bits = bits[:0]
+}
+
+// marshal appends marshaled bf to dst and returns the result.
+func (bf *bloomFilter) marshal(dst []byte) []byte {
+ bits := bf.bits
+ for _, word := range bits {
+ dst = encoding.MarshalUint64(dst, word)
+ }
+ return dst
+}
+
+// unmarshal unmarshals bf from src.
+func (bf *bloomFilter) unmarshal(src []byte) error {
+ if len(src)%8 != 0 {
+ return fmt.Errorf("cannot unmarshal bloomFilter from src with size not multiple by 8; len(src)=%d", len(src))
+ }
+ bf.reset()
+ wordsCount := len(src) / 8
+ bits := bf.bits
+ if n := wordsCount - cap(bits); n > 0 {
+ bits = append(bits[:cap(bits)], make([]uint64, n)...)
+ }
+ bits = bits[:wordsCount]
+ for i := range bits {
+ bits[i] = encoding.UnmarshalUint64(src)
+ src = src[8:]
+ }
+ bf.bits = bits
+ return nil
+}
+
+// mustInit initializes bf with the given tokens
+func (bf *bloomFilter) mustInit(tokens []string) {
+ bitsCount := len(tokens) * bloomFilterBitsPerItem
+ wordsCount := (bitsCount + 63) / 64
+ bits := bf.bits
+ if n := wordsCount - cap(bits); n > 0 {
+ bits = append(bits[:cap(bits)], make([]uint64, n)...)
+ }
+ bits = bits[:wordsCount]
+ bloomFilterAdd(bits, tokens)
+ bf.bits = bits
+}
+
+// bloomFilterAdd adds the given tokens to the bloom filter bits
+func bloomFilterAdd(bits []uint64, tokens []string) {
+ maxBits := uint64(len(bits)) * 64
+ var buf [8]byte
+ hp := (*uint64)(unsafe.Pointer(&buf[0]))
+ for _, token := range tokens {
+ *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
+ for i := 0; i < bloomFilterHashesCount; i++ {
+ hi := xxhash.Sum64(buf[:])
+ (*hp)++
+ idx := hi % maxBits
+ i := idx / 64
+ j := idx % 64
+ mask := uint64(1) << j
+ w := bits[i]
+ if (w & mask) == 0 {
+ bits[i] = w | mask
+ }
+ }
+ }
+}
+
+// containsAll returns true if bf contains all the given tokens.
+func (bf *bloomFilter) containsAll(tokens []string) bool {
+ bits := bf.bits
+ if len(bits) == 0 {
+ return true
+ }
+ maxBits := uint64(len(bits)) * 64
+ var buf [8]byte
+ hp := (*uint64)(unsafe.Pointer(&buf[0]))
+ for _, token := range tokens {
+ *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
+ for i := 0; i < bloomFilterHashesCount; i++ {
+ hi := xxhash.Sum64(buf[:])
+ (*hp)++
+ idx := hi % maxBits
+ i := idx / 64
+ j := idx % 64
+ mask := uint64(1) << j
+ w := bits[i]
+ if (w & mask) == 0 {
+ // The token is missing
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// containsAny returns true if bf contains at least a single token from the given tokens.
+func (bf *bloomFilter) containsAny(tokens []string) bool {
+ bits := bf.bits
+ if len(bits) == 0 {
+ return true
+ }
+ maxBits := uint64(len(bits)) * 64
+ var buf [8]byte
+ hp := (*uint64)(unsafe.Pointer(&buf[0]))
+nextToken:
+ for _, token := range tokens {
+ *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
+ for i := 0; i < bloomFilterHashesCount; i++ {
+ hi := xxhash.Sum64(buf[:])
+ (*hp)++
+ idx := hi % maxBits
+ i := idx / 64
+ j := idx % 64
+ mask := uint64(1) << j
+ w := bits[i]
+ if (w & mask) == 0 {
+ // The token is missing. Check the next token
+ continue nextToken
+ }
+ }
+ // It is likely the token exists in the bloom filter
+ return true
+ }
+ return false
+}
+
+func getBloomFilter() *bloomFilter {
+ v := bloomFilterPool.Get()
+ if v == nil {
+ return &bloomFilter{}
+ }
+ return v.(*bloomFilter)
+}
+
+func putBloomFilter(bf *bloomFilter) {
+ bf.reset()
+ bloomFilterPool.Put(bf)
+}
+
+var bloomFilterPool sync.Pool
diff --git a/lib/logstorage/bloomfilter_test.go b/lib/logstorage/bloomfilter_test.go
new file mode 100644
index 000000000..061e1483f
--- /dev/null
+++ b/lib/logstorage/bloomfilter_test.go
@@ -0,0 +1,84 @@
+package logstorage
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestBloomFilter(t *testing.T) {
+ f := func(tokens []string) {
+ t.Helper()
+ data := bloomFilterMarshal(nil, tokens)
+ bf := getBloomFilter()
+ defer putBloomFilter(bf)
+ if err := bf.unmarshal(data); err != nil {
+ t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
+ }
+ for _, token := range tokens {
+ if !bf.containsAny([]string{token}) {
+ t.Fatalf("bloomFilterContains must return true for the added token %q", token)
+ }
+ }
+ if !bf.containsAll(tokens) {
+ t.Fatalf("bloomFilterContains must return true for the added tokens")
+ }
+ }
+ f(nil)
+ f([]string{"foo"})
+ f([]string{"foo", "bar", "baz"})
+
+ // 10k tokens
+ tokens := make([]string, 10000)
+ for i := range tokens {
+ tokens[i] = fmt.Sprintf("token_%d", i)
+ }
+ f(tokens)
+}
+
+func TestBloomFilterUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ bf := getBloomFilter()
+ defer putBloomFilter(bf)
+ if err := bf.unmarshal(data); err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ }
+ f([]byte("a"))
+ f([]byte("foo"))
+}
+
+func TestBloomFilterUnmarshalGarbage(t *testing.T) {
+ data := []byte("01234567")
+ var bf bloomFilter
+ if err := bf.unmarshal(data); err != nil {
+ t.Fatalf("unexpected error: %s", err)
+ }
+}
+
+func TestBloomFilterFalsePositive(t *testing.T) {
+ tokens := make([]string, 20000)
+ for i := range tokens {
+ tokens[i] = fmt.Sprintf("token_%d", i)
+ }
+ data := bloomFilterMarshal(nil, tokens)
+ bf := getBloomFilter()
+ defer putBloomFilter(bf)
+ if err := bf.unmarshal(data); err != nil {
+ t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
+ }
+
+ // count the number of false positives on 20K non-existing tokens
+ falsePositives := 0
+ for i := range tokens {
+ token := fmt.Sprintf("non-existing-token_%d", i)
+ if bf.containsAny([]string{token}) {
+ falsePositives++
+ }
+ }
+ p := float64(falsePositives) / float64(len(tokens))
+ maxFalsePositive := 0.0011
+ if p > maxFalsePositive {
+ t.Fatalf("too high false positive rate; got %.4f; want %.4f max", p, maxFalsePositive)
+ }
+}
diff --git a/lib/logstorage/consts.go b/lib/logstorage/consts.go
new file mode 100644
index 000000000..20e3590e2
--- /dev/null
+++ b/lib/logstorage/consts.go
@@ -0,0 +1,32 @@
+package logstorage
+
+// maxUncompressedIndexBlockSize contains the maximum length of uncompressed block with blockHeader entries aka index block.
+//
+// The real block length can exceed this value by a small percentage because of the block write details.
+const maxUncompressedIndexBlockSize = 128 * 1024
+
+// maxUncompressedBlockSize is the maximum size of uncompressed block in bytes.
+//
+// The real uncompressed block can exceed this value by up to 2 times because of block merge details.
+const maxUncompressedBlockSize = 2 * 1024 * 1024
+
+// maxRowsPerBlock is the maximum number of log entries a single block can contain.
+const maxRowsPerBlock = 8 * 1024 * 1024
+
+// maxColumnsPerBlock is the maximum number of columns per block.
+const maxColumnsPerBlock = 10000
+
+// maxIndexBlockSize is the maximum size of the block with blockHeader entries (aka indexBlock)
+const maxIndexBlockSize = 8 * 1024 * 1024
+
+// maxTimestampsBlockSize is the maximum size of timestamps block
+const maxTimestampsBlockSize = 8 * 1024 * 1024
+
+// maxValuesBlockSize is the maximum size of values block
+const maxValuesBlockSize = 8 * 1024 * 1024
+
+// maxBloomFilterBlockSize is the maximum size of bloom filter block
+const maxBloomFilterBlockSize = 8 * 1024 * 1024
+
+// maxColumnsHeaderSize is the maximum size of columnsHeader block
+const maxColumnsHeaderSize = 8 * 1024 * 1024
diff --git a/lib/logstorage/datadb.go b/lib/logstorage/datadb.go
new file mode 100644
index 000000000..e605e2f24
--- /dev/null
+++ b/lib/logstorage/datadb.go
@@ -0,0 +1,990 @@
+package logstorage
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sort"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+)
+
+// Default number of parts to merge at once.
+//
+// This number has been obtained empirically - it gives the lowest possible overhead.
+// See appendPartsToMerge tests for details.
+const defaultPartsToMerge = 15
+
+// minMergeMultiplier is the minimum multiplier for the size of the output part
+// compared to the size of the maximum input part for the merge.
+//
+// Higher value reduces write amplification (disk write IO induced by the merge),
+// while increases the number of unmerged parts.
+// The 1.7 is good enough for production workloads.
+const minMergeMultiplier = 1.7
+
+// The maximum number of inmemory parts in the partition.
+//
+// If the number of inmemory parts reaches this value, then assisted merge runs during data ingestion.
+const maxInmemoryPartsPerPartition = 20
+
+// datadb represents a database with log data
+type datadb struct {
+ // pt is the partition the datadb belongs to
+ pt *partition
+
+ // mergeIdx is used for generating unique directory names for parts
+ mergeIdx uint64
+
+ // path is the path to the directory with log data
+ path string
+
+ // flushInterval is interval for flushing the inmemory parts to disk
+ flushInterval time.Duration
+
+ // inmemoryParts contains a list of inmemory parts
+ inmemoryParts []*partWrapper
+
+ // fileParts contains a list of file-based parts
+ fileParts []*partWrapper
+
+ // partsLock protects parts from concurrent access
+ partsLock sync.Mutex
+
+ // wg is used for determining when background workers stop
+ wg sync.WaitGroup
+
+ // stopCh is used for notifying background workers to stop
+ stopCh chan struct{}
+
+ // mergeDoneCond is used for pace-limiting the data ingestion rate
+ mergeDoneCond *sync.Cond
+
+ // inmemoryPartsFlushersCount is the number of currently running in-memory parts flushers
+ //
+ // This variable must be accessed under partsLock.
+ inmemoryPartsFlushersCount int
+
+ // mergeWorkersCount is the number of currently running merge workers
+ //
+ // This variable must be accessed under partsLock.
+ mergeWorkersCount int
+}
+
+// partWrapper is a wrapper for opened part.
+type partWrapper struct {
+ // refCount is the number of references to p.
+ //
+ // When the number of references reaches zero, then p is closed.
+ refCount int32
+
+ // The flag, which is set when the part must be deleted after refCount reaches zero.
+ mustBeDeleted uint32
+
+ // p is an opened part
+ p *part
+
+ // mp references inmemory part used for initializing p.
+ mp *inmemoryPart
+
+ // isInMerge is set to true if the part takes part in merge.
+ isInMerge bool
+
+ // The deadline when in-memory part must be flushed to disk.
+ flushDeadline time.Time
+}
+
+func (pw *partWrapper) incRef() {
+ atomic.AddInt32(&pw.refCount, 1)
+}
+
+func (pw *partWrapper) decRef() {
+ n := atomic.AddInt32(&pw.refCount, -1)
+ if n > 0 {
+ return
+ }
+
+ deletePath := ""
+ if pw.mp == nil {
+ if atomic.LoadUint32(&pw.mustBeDeleted) != 0 {
+ deletePath = pw.p.path
+ }
+ } else {
+ putInmemoryPart(pw.mp)
+ pw.mp = nil
+ }
+
+ mustClosePart(pw.p)
+ pw.p = nil
+
+ if deletePath != "" {
+ fs.MustRemoveAll(deletePath)
+ }
+}
+
+func mustCreateDatadb(path string) {
+ fs.MustMkdirFailIfExist(path)
+ mustWritePartNames(path, []string{})
+}
+
+// mustOpenDatadb opens datadb at the given path with the given flushInterval for in-memory data.
+func mustOpenDatadb(pt *partition, path string, flushInterval time.Duration) *datadb {
+ // Remove temporary directories, which may be left after unclean shutdown.
+ fs.MustRemoveTemporaryDirs(path)
+
+ partNames := mustReadPartNames(path)
+ mustRemoveUnusedDirs(path, partNames)
+
+ pws := make([]*partWrapper, len(partNames))
+ for i, partName := range partNames {
+ partPath := filepath.Join(path, partName)
+ p := mustOpenFilePart(pt, partPath)
+ pws[i] = newPartWrapper(p, nil, time.Time{})
+ }
+
+ ddb := &datadb{
+ pt: pt,
+ mergeIdx: uint64(time.Now().UnixNano()),
+ flushInterval: flushInterval,
+ path: path,
+ fileParts: pws,
+ stopCh: make(chan struct{}),
+ }
+ ddb.mergeDoneCond = sync.NewCond(&ddb.partsLock)
+
+ // Start merge workers in the hope they'll merge the remaining parts
+ ddb.partsLock.Lock()
+ n := getMergeWorkersCount()
+ for i := 0; i < n; i++ {
+ ddb.startMergeWorkerLocked()
+ }
+ ddb.partsLock.Unlock()
+
+ return ddb
+}
+
+// startInmemoryPartsFlusherLocked starts flusher for in-memory parts to disk.
+//
+// This function must be called under partsLock.
+func (ddb *datadb) startInmemoryPartsFlusherLocked() {
+ if ddb.inmemoryPartsFlushersCount >= 1 {
+ return
+ }
+ ddb.inmemoryPartsFlushersCount++
+ ddb.wg.Add(1)
+ go func() {
+ ddb.flushInmemoryParts()
+ ddb.wg.Done()
+ }()
+}
+
+func (ddb *datadb) flushInmemoryParts() {
+ ticker := time.NewTicker(time.Second)
+ defer ticker.Stop()
+ for {
+ ddb.partsLock.Lock()
+ pws := make([]*partWrapper, 0, len(ddb.inmemoryParts))
+ pws = appendNotInMergePartsLocked(pws, ddb.inmemoryParts)
+ currentTime := time.Now()
+ partsToFlush := pws[:0]
+ for _, pw := range pws {
+ if pw.flushDeadline.Before(currentTime) {
+ partsToFlush = append(partsToFlush, pw)
+ }
+ }
+ setInMergeLocked(partsToFlush)
+ if len(pws) == 0 {
+ ddb.inmemoryPartsFlushersCount--
+ }
+ ddb.partsLock.Unlock()
+
+ if len(pws) == 0 {
+ // There are no in-memory parts, so stop the flusher.
+ return
+ }
+ ddb.mustMergePartsFinal(partsToFlush)
+
+ select {
+ case <-ddb.stopCh:
+ return
+ case <-ticker.C:
+ }
+ }
+}
+
+// startMergeWorkerLocked starts a merge worker.
+//
+// This function must be called under locked partsLock.
+func (ddb *datadb) startMergeWorkerLocked() {
+ if ddb.mergeWorkersCount >= getMergeWorkersCount() {
+ return
+ }
+ ddb.mergeWorkersCount++
+ ddb.wg.Add(1)
+ go func() {
+ globalMergeLimitCh <- struct{}{}
+ ddb.mustMergeExistingParts()
+ <-globalMergeLimitCh
+ ddb.wg.Done()
+ }()
+}
+
+// globalMergeLimitCh limits the number of concurrent merges across all the partitions
+var globalMergeLimitCh = make(chan struct{}, getMergeWorkersCount())
+
+func getMergeWorkersCount() int {
+ n := cgroup.AvailableCPUs()
+ if n < 4 {
+ // Use bigger number of workers on systems with small number of CPU cores,
+ // since a single worker may become busy for long time when merging big parts.
+ // Then the remaining workers may continue performing merges
+ // for newly added small parts.
+ return 4
+ }
+ return n
+}
+
+func (ddb *datadb) mustMergeExistingParts() {
+ for !needStop(ddb.stopCh) {
+ maxOutBytes := ddb.availableDiskSpace()
+
+ ddb.partsLock.Lock()
+ parts := make([]*partWrapper, 0, len(ddb.inmemoryParts)+len(ddb.fileParts))
+ parts = appendNotInMergePartsLocked(parts, ddb.inmemoryParts)
+ parts = appendNotInMergePartsLocked(parts, ddb.fileParts)
+ pws := appendPartsToMerge(nil, parts, maxOutBytes)
+ setInMergeLocked(pws)
+ if len(pws) == 0 {
+ ddb.mergeWorkersCount--
+ }
+ ddb.partsLock.Unlock()
+
+ if len(pws) == 0 {
+ // Nothing to merge at the moment.
+ return
+ }
+
+ partsSize := getCompressedSize(pws)
+ if !ddb.reserveDiskSpace(partsSize) {
+ // There is no free disk space for the merge,
+ // because concurrent merge workers already reserved the disk space.
+ // Try again with smaller maxOutBytes.
+ ddb.releasePartsToMerge(pws)
+ continue
+ }
+ ddb.mustMergeParts(pws, false)
+ ddb.releaseDiskSpace(partsSize)
+ }
+}
+
+// appendNotInMergePartsLocked appends src parts with isInMerge=false to dst and returns the result.
+//
+// This function must be called under partsLock.
+func appendNotInMergePartsLocked(dst, src []*partWrapper) []*partWrapper {
+ for _, pw := range src {
+ if !pw.isInMerge {
+ dst = append(dst, pw)
+ }
+ }
+ return dst
+}
+
+// setInMergeLocked sets isInMerge flag for pws.
+//
+// This function must be called under partsLock.
+func setInMergeLocked(pws []*partWrapper) {
+ for _, pw := range pws {
+ if pw.isInMerge {
+ logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to true")
+ }
+ pw.isInMerge = true
+ }
+}
+
+func assertIsInMerge(pws []*partWrapper) {
+ for _, pw := range pws {
+ if !pw.isInMerge {
+ logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to false")
+ }
+ }
+}
+
+// mustMergeParts merges pws to a single resulting part.
+//
+// if isFinal is set, then the resulting part will be saved to disk.
+//
+// All the parts inside pws must have isInMerge field set to true.
+func (ddb *datadb) mustMergeParts(pws []*partWrapper, isFinal bool) {
+ if len(pws) == 0 {
+ // Nothing to merge.
+ return
+ }
+ assertIsInMerge(pws)
+
+ startTime := time.Now()
+
+ // Initialize destination paths.
+ dstPartType := ddb.getDstPartType(pws, isFinal)
+ mergeIdx := ddb.nextMergeIdx()
+ dstPartPath := ddb.getDstPartPath(dstPartType, mergeIdx)
+
+ if isFinal && len(pws) == 1 && pws[0].mp != nil {
+ // Fast path: flush a single in-memory part to disk.
+ mp := pws[0].mp
+ mp.MustStoreToDisk(dstPartPath)
+ pwNew := ddb.openCreatedPart(&mp.ph, pws, nil, dstPartPath)
+ ddb.swapSrcWithDstParts(pws, pwNew, dstPartType)
+ return
+ }
+
+ // Prepare blockStreamReaders for source parts.
+ bsrs := mustOpenBlockStreamReaders(pws)
+
+ // Prepare BlockStreamWriter for destination part.
+ srcSize := uint64(0)
+ srcRowsCount := uint64(0)
+ srcBlocksCount := uint64(0)
+ for _, pw := range pws {
+ srcSize += pw.p.ph.CompressedSizeBytes
+ srcRowsCount += pw.p.ph.RowsCount
+ srcBlocksCount += pw.p.ph.BlocksCount
+ }
+ bsw := getBlockStreamWriter()
+ var mpNew *inmemoryPart
+ if dstPartType == partInmemory {
+ mpNew = getInmemoryPart()
+ bsw.MustInitForInmemoryPart(mpNew)
+ } else {
+ nocache := !shouldUsePageCacheForPartSize(srcSize)
+ bsw.MustInitForFilePart(dstPartPath, nocache)
+ }
+
+ // Merge source parts to destination part.
+ var ph partHeader
+ stopCh := ddb.stopCh
+ if isFinal {
+ // The final merge shouldn't be stopped even if ddb.stopCh is closed.
+ stopCh = nil
+ }
+ mustMergeBlockStreams(&ph, bsw, bsrs, stopCh)
+ putBlockStreamWriter(bsw)
+ for _, bsr := range bsrs {
+ putBlockStreamReader(bsr)
+ }
+
+ // Persist partHeader for destination part after the merge.
+ if mpNew != nil {
+ mpNew.ph = ph
+ } else {
+ ph.mustWriteMetadata(dstPartPath)
+ // Make sure the created part directory listing is synced.
+ fs.MustSyncPath(dstPartPath)
+ }
+ if needStop(stopCh) {
+ ddb.releasePartsToMerge(pws)
+ ddb.mergeDoneCond.Broadcast()
+ // Remove incomplete destination part
+ if dstPartType == partFile {
+ fs.MustRemoveAll(dstPartPath)
+ }
+ return
+ }
+
+ // Atomically swap the source parts with the newly created part.
+ pwNew := ddb.openCreatedPart(&ph, pws, mpNew, dstPartPath)
+
+ dstSize := uint64(0)
+ dstRowsCount := uint64(0)
+ dstBlocksCount := uint64(0)
+ if pwNew != nil {
+ pDst := pwNew.p
+ dstSize = pDst.ph.CompressedSizeBytes
+ dstRowsCount = pDst.ph.RowsCount
+ dstBlocksCount = pDst.ph.BlocksCount
+ }
+
+ ddb.swapSrcWithDstParts(pws, pwNew, dstPartType)
+
+ d := time.Since(startTime)
+ if d <= 30*time.Second {
+ return
+ }
+
+ // Log stats for long merges.
+ durationSecs := d.Seconds()
+ rowsPerSec := int(float64(srcRowsCount) / durationSecs)
+ logger.Infof("merged (%d parts, %d rows, %d blocks, %d bytes) into (1 part, %d rows, %d blocks, %d bytes) in %.3f seconds at %d rows/sec to %q",
+ len(pws), srcRowsCount, srcBlocksCount, srcSize, dstRowsCount, dstBlocksCount, dstSize, durationSecs, rowsPerSec, dstPartPath)
+}
+
+func (ddb *datadb) nextMergeIdx() uint64 {
+ return atomic.AddUint64(&ddb.mergeIdx, 1)
+}
+
+type partType int
+
+var (
+ partInmemory = partType(0)
+ partFile = partType(1)
+)
+
+func (ddb *datadb) getDstPartType(pws []*partWrapper, isFinal bool) partType {
+ if isFinal {
+ return partFile
+ }
+ dstPartSize := getCompressedSize(pws)
+ if dstPartSize > getMaxInmemoryPartSize() {
+ return partFile
+ }
+ if !areAllInmemoryParts(pws) {
+ // If at least a single source part is located in file,
+ // then the destination part must be in file for durability reasons.
+ return partFile
+ }
+ return partInmemory
+}
+
+func (ddb *datadb) getDstPartPath(dstPartType partType, mergeIdx uint64) string {
+ ptPath := ddb.path
+ dstPartPath := ""
+ if dstPartType != partInmemory {
+ dstPartPath = filepath.Join(ptPath, fmt.Sprintf("%016X", mergeIdx))
+ }
+ return dstPartPath
+}
+
+func (ddb *datadb) openCreatedPart(ph *partHeader, pws []*partWrapper, mpNew *inmemoryPart, dstPartPath string) *partWrapper {
+ // Open the created part.
+ if ph.RowsCount == 0 {
+ // The created part is empty. Remove it
+ if mpNew == nil {
+ fs.MustRemoveAll(dstPartPath)
+ }
+ return nil
+ }
+ var p *part
+ var flushDeadline time.Time
+ if mpNew != nil {
+ // Open the created part from memory.
+ p = mustOpenInmemoryPart(ddb.pt, mpNew)
+ flushDeadline = ddb.getFlushToDiskDeadline(pws)
+ } else {
+ // Open the created part from disk.
+ p = mustOpenFilePart(ddb.pt, dstPartPath)
+ }
+ return newPartWrapper(p, mpNew, flushDeadline)
+}
+
+func (ddb *datadb) mustAddRows(lr *LogRows) {
+ if len(lr.streamIDs) == 0 {
+ return
+ }
+
+ mp := getInmemoryPart()
+ mp.mustInitFromRows(lr)
+ p := mustOpenInmemoryPart(ddb.pt, mp)
+
+ flushDeadline := time.Now().Add(ddb.flushInterval)
+ pw := newPartWrapper(p, mp, flushDeadline)
+
+ ddb.partsLock.Lock()
+ ddb.inmemoryParts = append(ddb.inmemoryParts, pw)
+ ddb.startInmemoryPartsFlusherLocked()
+ if len(ddb.inmemoryParts) > defaultPartsToMerge {
+ ddb.startMergeWorkerLocked()
+ }
+ for len(ddb.inmemoryParts) > maxInmemoryPartsPerPartition {
+ // limit the pace for data ingestion if too many inmemory parts are created
+ ddb.mergeDoneCond.Wait()
+ }
+ ddb.partsLock.Unlock()
+}
+
+// DatadbStats contains various stats for datadb.
+type DatadbStats struct {
+ // InmemoryRowsCount is the number of rows, which weren't flushed to disk yet.
+ InmemoryRowsCount uint64
+
+ // FileRowsCount is the number of rows stored on disk.
+ FileRowsCount uint64
+
+ // InmemoryParts is the number of in-memory parts, which weren't flushed to disk yet.
+ InmemoryParts uint64
+
+ // FileParts is the number of file-based parts stored on disk.
+ FileParts uint64
+
+ // InmemoryBlocks is the number of in-memory blocks, which weren't flushed to disk yet.
+ InmemoryBlocks uint64
+
+ // FileBlocks is the number of file-based blocks stored on disk.
+ FileBlocks uint64
+
+ // CompressedInmemorySize is the size of compressed data stored in memory.
+ CompressedInmemorySize uint64
+
+ // CompressedFileSize is the size of compressed data stored on disk.
+ CompressedFileSize uint64
+
+ // UncompressedInmemorySize is the size of uncompressed data stored in memory.
+ UncompressedInmemorySize uint64
+
+ // UncompressedFileSize is the size of uncompressed data stored on disk.
+ UncompressedFileSize uint64
+}
+
+func (s *DatadbStats) reset() {
+ *s = DatadbStats{}
+}
+
+// RowsCount returns the number of rows stored in datadb.
+func (s *DatadbStats) RowsCount() uint64 {
+ return s.InmemoryRowsCount + s.FileRowsCount
+}
+
+// updateStats updates s with ddb stats
+func (ddb *datadb) updateStats(s *DatadbStats) {
+ ddb.partsLock.Lock()
+
+ s.InmemoryRowsCount += getRowsCount(ddb.inmemoryParts)
+ s.FileRowsCount += getRowsCount(ddb.fileParts)
+
+ s.InmemoryParts += uint64(len(ddb.inmemoryParts))
+ s.FileParts += uint64(len(ddb.fileParts))
+
+ s.InmemoryBlocks += getBlocksCount(ddb.inmemoryParts)
+ s.FileBlocks += getBlocksCount(ddb.fileParts)
+
+ s.CompressedInmemorySize += getCompressedSize(ddb.inmemoryParts)
+ s.CompressedFileSize += getCompressedSize(ddb.fileParts)
+
+ s.UncompressedInmemorySize += getUncompressedSize(ddb.inmemoryParts)
+ s.UncompressedFileSize += getUncompressedSize(ddb.fileParts)
+
+ ddb.partsLock.Unlock()
+}
+
+// debugFlush() makes sure that the recently ingested data is availalbe for search.
+func (ddb *datadb) debugFlush() {
+ // Nothing to do, since all the ingested data is available for search via ddb.inmemoryParts.
+}
+
+func (ddb *datadb) mustMergePartsFinal(pws []*partWrapper) {
+ assertIsInMerge(pws)
+
+ var pwsChunk []*partWrapper
+ for len(pws) > 0 {
+ pwsChunk = appendPartsToMerge(pwsChunk[:0], pws, (1<<64)-1)
+ if len(pwsChunk) == 0 {
+ pwsChunk = append(pwsChunk[:0], pws...)
+ }
+ ddb.mustMergeParts(pwsChunk, true)
+
+ partsToRemove := partsToMap(pwsChunk)
+ removedParts := 0
+ pws, removedParts = removeParts(pws, partsToRemove)
+ if removedParts != len(pwsChunk) {
+ logger.Panicf("BUG: unexpected number of parts removed; got %d; want %d", removedParts, len(pwsChunk))
+ }
+ }
+}
+
+func partsToMap(pws []*partWrapper) map[*partWrapper]struct{} {
+ m := make(map[*partWrapper]struct{}, len(pws))
+ for _, pw := range pws {
+ m[pw] = struct{}{}
+ }
+ if len(m) != len(pws) {
+ logger.Panicf("BUG: %d duplicate parts found out of %d parts", len(pws)-len(m), len(pws))
+ }
+ return m
+}
+
+func (ddb *datadb) swapSrcWithDstParts(pws []*partWrapper, pwNew *partWrapper, dstPartType partType) {
+ // Atomically unregister old parts and add new part to pt.
+ partsToRemove := partsToMap(pws)
+ removedInmemoryParts := 0
+ removedFileParts := 0
+
+ ddb.partsLock.Lock()
+
+ ddb.inmemoryParts, removedInmemoryParts = removeParts(ddb.inmemoryParts, partsToRemove)
+ ddb.fileParts, removedFileParts = removeParts(ddb.fileParts, partsToRemove)
+ if pwNew != nil {
+ switch dstPartType {
+ case partInmemory:
+ ddb.inmemoryParts = append(ddb.inmemoryParts, pwNew)
+ ddb.startInmemoryPartsFlusherLocked()
+ case partFile:
+ ddb.fileParts = append(ddb.fileParts, pwNew)
+ default:
+ logger.Panicf("BUG: unknown partType=%d", dstPartType)
+ }
+ if len(ddb.inmemoryParts)+len(ddb.fileParts) > defaultPartsToMerge {
+ ddb.startMergeWorkerLocked()
+ }
+ }
+
+ // Atomically store the updated list of file-based parts on disk.
+ // This must be performed under partsLock in order to prevent from races
+ // when multiple concurrently running goroutines update the list.
+ if removedFileParts > 0 || pwNew != nil && dstPartType == partFile {
+ partNames := getPartNames(ddb.fileParts)
+ mustWritePartNames(ddb.path, partNames)
+ }
+
+ ddb.partsLock.Unlock()
+
+ removedParts := removedInmemoryParts + removedFileParts
+ if removedParts != len(partsToRemove) {
+ logger.Panicf("BUG: unexpected number of parts removed; got %d, want %d", removedParts, len(partsToRemove))
+ }
+
+ // Mark old parts as must be deleted and decrement reference count,
+ // so they are eventually closed and deleted.
+ for _, pw := range pws {
+ atomic.StoreUint32(&pw.mustBeDeleted, 1)
+ pw.decRef()
+ }
+
+ ddb.mergeDoneCond.Broadcast()
+}
+
+func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]struct{}) ([]*partWrapper, int) {
+ dst := pws[:0]
+ for _, pw := range pws {
+ if _, ok := partsToRemove[pw]; !ok {
+ dst = append(dst, pw)
+ }
+ }
+ for i := len(dst); i < len(pws); i++ {
+ pws[i] = nil
+ }
+ return dst, len(pws) - len(dst)
+}
+
+func mustOpenBlockStreamReaders(pws []*partWrapper) []*blockStreamReader {
+ bsrs := make([]*blockStreamReader, 0, len(pws))
+ for _, pw := range pws {
+ bsr := getBlockStreamReader()
+ if pw.mp != nil {
+ bsr.MustInitFromInmemoryPart(pw.mp)
+ } else {
+ bsr.MustInitFromFilePart(pw.p.path)
+ }
+ bsrs = append(bsrs, bsr)
+ }
+ return bsrs
+}
+
+func newPartWrapper(p *part, mp *inmemoryPart, flushDeadline time.Time) *partWrapper {
+ pw := &partWrapper{
+ p: p,
+ mp: mp,
+
+ flushDeadline: flushDeadline,
+ }
+
+ // Increase reference counter for newly created part - it is decreased when the part
+ // is removed from the list of open parts.
+ pw.incRef()
+
+ return pw
+}
+
+func (ddb *datadb) getFlushToDiskDeadline(pws []*partWrapper) time.Time {
+ d := time.Now().Add(ddb.flushInterval)
+ for _, pw := range pws {
+ if pw.mp != nil && pw.flushDeadline.Before(d) {
+ d = pw.flushDeadline
+ }
+ }
+ return d
+}
+
+func getMaxInmemoryPartSize() uint64 {
+ // Allocate 10% of allowed memory for in-memory parts.
+ n := uint64(0.1 * float64(memory.Allowed()) / maxInmemoryPartsPerPartition)
+ if n < 1e6 {
+ n = 1e6
+ }
+ return n
+}
+
+func areAllInmemoryParts(pws []*partWrapper) bool {
+ for _, pw := range pws {
+ if pw.mp == nil {
+ return false
+ }
+ }
+ return true
+}
+
+func (ddb *datadb) releasePartsToMerge(pws []*partWrapper) {
+ ddb.partsLock.Lock()
+ for _, pw := range pws {
+ if !pw.isInMerge {
+ logger.Panicf("BUG: missing isInMerge flag on the part %q", pw.p.path)
+ }
+ pw.isInMerge = false
+ }
+ ddb.partsLock.Unlock()
+}
+
+func (ddb *datadb) availableDiskSpace() uint64 {
+ available := fs.MustGetFreeSpace(ddb.path)
+ reserved := atomic.LoadUint64(&reservedDiskSpace)
+ if available < reserved {
+ return 0
+ }
+ return available - reserved
+}
+
+func (ddb *datadb) reserveDiskSpace(n uint64) bool {
+ available := fs.MustGetFreeSpace(ddb.path)
+ reserved := atomic.AddUint64(&reservedDiskSpace, n)
+ if available > reserved {
+ return true
+ }
+ ddb.releaseDiskSpace(n)
+ return false
+}
+
+func (ddb *datadb) releaseDiskSpace(n uint64) {
+ atomic.AddUint64(&reservedDiskSpace, -n)
+}
+
+// reservedDiskSpace tracks global reserved disk space for currently executed
+// background merges across all the partitions.
+//
+// It should allow avoiding background merges when there is no free disk space.
+var reservedDiskSpace uint64
+
+func needStop(stopCh <-chan struct{}) bool {
+ select {
+ case <-stopCh:
+ return true
+ default:
+ return false
+ }
+}
+
+// mustCloseDatadb can be called only when nobody accesses ddb.
+func mustCloseDatadb(ddb *datadb) {
+ // Stop background workers
+ close(ddb.stopCh)
+ ddb.wg.Wait()
+
+ // flush in-memory data to disk
+ pws := append([]*partWrapper{}, ddb.inmemoryParts...)
+ setInMergeLocked(pws)
+ ddb.mustMergePartsFinal(pws)
+
+ // There is no need in using ddb.partsLock here, since nobody should acces ddb now.
+ for _, pw := range ddb.inmemoryParts {
+ pw.decRef()
+ if pw.refCount != 0 {
+ logger.Panicf("BUG: there are %d references to inmemoryPart", pw.refCount)
+ }
+ }
+ ddb.inmemoryParts = nil
+
+ for _, pw := range ddb.fileParts {
+ pw.decRef()
+ if pw.refCount != 0 {
+ logger.Panicf("BUG: ther are %d references to filePart", pw.refCount)
+ }
+ }
+ ddb.fileParts = nil
+
+ ddb.path = ""
+ ddb.pt = nil
+}
+
+func getPartNames(pws []*partWrapper) []string {
+ partNames := make([]string, 0, len(pws))
+ for _, pw := range pws {
+ if pw.mp != nil {
+ // Skip in-memory parts
+ continue
+ }
+ partName := filepath.Base(pw.p.path)
+ partNames = append(partNames, partName)
+ }
+ sort.Strings(partNames)
+ return partNames
+}
+
+func mustWritePartNames(path string, partNames []string) {
+ data, err := json.Marshal(partNames)
+ if err != nil {
+ logger.Panicf("BUG: cannot marshal partNames to JSON: %s", err)
+ }
+ partNamesPath := filepath.Join(path, partsFilename)
+ fs.MustWriteAtomic(partNamesPath, data, true)
+}
+
+func mustReadPartNames(path string) []string {
+ partNamesPath := filepath.Join(path, partsFilename)
+ data, err := os.ReadFile(partNamesPath)
+ if err != nil {
+ logger.Panicf("FATAL: cannot read %s: %s", partNamesPath, err)
+ }
+ var partNames []string
+ if err := json.Unmarshal(data, &partNames); err != nil {
+ logger.Panicf("FATAL: cannot parse %s: %s", partNamesPath, err)
+ }
+ return partNames
+}
+
+// mustRemoveUnusedDirs removes dirs at path, which are missing in partNames.
+//
+// These dirs may be left after unclean shutdown.
+func mustRemoveUnusedDirs(path string, partNames []string) {
+ des := fs.MustReadDir(path)
+ m := make(map[string]struct{}, len(partNames))
+ for _, partName := range partNames {
+ m[partName] = struct{}{}
+ }
+ removedDirs := 0
+ for _, de := range des {
+ if !fs.IsDirOrSymlink(de) {
+ // Skip non-directories.
+ continue
+ }
+ fn := de.Name()
+ if _, ok := m[fn]; !ok {
+ deletePath := filepath.Join(path, fn)
+ fs.MustRemoveAll(deletePath)
+ removedDirs++
+ }
+ }
+ if removedDirs > 0 {
+ fs.MustSyncPath(path)
+ }
+}
+
+// appendPartsToMerge finds optimal parts to merge from src,
+// appends them to dst and returns the result.
+func appendPartsToMerge(dst, src []*partWrapper, maxOutBytes uint64) []*partWrapper {
+ if len(src) < 2 {
+ // There is no need in merging zero or one part :)
+ return dst
+ }
+
+ // Filter out too big parts.
+ // This should reduce N for O(N^2) algorithm below.
+ maxInPartBytes := uint64(float64(maxOutBytes) / minMergeMultiplier)
+ tmp := make([]*partWrapper, 0, len(src))
+ for _, pw := range src {
+ if pw.p.ph.CompressedSizeBytes > maxInPartBytes {
+ continue
+ }
+ tmp = append(tmp, pw)
+ }
+ src = tmp
+
+ sortPartsForOptimalMerge(src)
+
+ maxSrcParts := defaultPartsToMerge
+ if maxSrcParts > len(src) {
+ maxSrcParts = len(src)
+ }
+ minSrcParts := (maxSrcParts + 1) / 2
+ if minSrcParts < 2 {
+ minSrcParts = 2
+ }
+
+ // Exhaustive search for parts giving the lowest write amplification when merged.
+ var pws []*partWrapper
+ maxM := float64(0)
+ for i := minSrcParts; i <= maxSrcParts; i++ {
+ for j := 0; j <= len(src)-i; j++ {
+ a := src[j : j+i]
+ if a[0].p.ph.CompressedSizeBytes*uint64(len(a)) < a[len(a)-1].p.ph.CompressedSizeBytes {
+ // Do not merge parts with too big difference in size,
+ // since this results in unbalanced merges.
+ continue
+ }
+ outSize := getCompressedSize(a)
+ if outSize > maxOutBytes {
+ // There is no need in verifying remaining parts with bigger sizes.
+ break
+ }
+ m := float64(outSize) / float64(a[len(a)-1].p.ph.CompressedSizeBytes)
+ if m < maxM {
+ continue
+ }
+ maxM = m
+ pws = a
+ }
+ }
+
+ minM := float64(defaultPartsToMerge) / 2
+ if minM < minMergeMultiplier {
+ minM = minMergeMultiplier
+ }
+ if maxM < minM {
+ // There is no sense in merging parts with too small m,
+ // since this leads to high disk write IO.
+ return dst
+ }
+ return append(dst, pws...)
+}
+
+func sortPartsForOptimalMerge(pws []*partWrapper) {
+ // Sort src parts by size and backwards timestamp.
+ // This should improve adjanced points' locality in the merged parts.
+ sort.Slice(pws, func(i, j int) bool {
+ a := &pws[i].p.ph
+ b := &pws[j].p.ph
+ if a.CompressedSizeBytes == b.CompressedSizeBytes {
+ return a.MinTimestamp > b.MinTimestamp
+ }
+ return a.CompressedSizeBytes < b.CompressedSizeBytes
+ })
+}
+
+func getCompressedSize(pws []*partWrapper) uint64 {
+ n := uint64(0)
+ for _, pw := range pws {
+ n += pw.p.ph.CompressedSizeBytes
+ }
+ return n
+}
+
+func getUncompressedSize(pws []*partWrapper) uint64 {
+ n := uint64(0)
+ for _, pw := range pws {
+ n += pw.p.ph.UncompressedSizeBytes
+ }
+ return n
+}
+
+func getRowsCount(pws []*partWrapper) uint64 {
+ n := uint64(0)
+ for _, pw := range pws {
+ n += pw.p.ph.RowsCount
+ }
+ return n
+}
+
+func getBlocksCount(pws []*partWrapper) uint64 {
+ n := uint64(0)
+ for _, pw := range pws {
+ n += pw.p.ph.BlocksCount
+ }
+ return n
+}
+
+func shouldUsePageCacheForPartSize(size uint64) bool {
+ mem := memory.Remaining() / defaultPartsToMerge
+ return size <= uint64(mem)
+}
diff --git a/lib/logstorage/datadb_test.go b/lib/logstorage/datadb_test.go
new file mode 100644
index 000000000..5f97a9bd8
--- /dev/null
+++ b/lib/logstorage/datadb_test.go
@@ -0,0 +1,91 @@
+package logstorage
+
+import (
+ "math/rand"
+ "testing"
+)
+
+func TestAppendPartsToMergeManyParts(t *testing.T) {
+ // Verify that big number of parts are merged into minimal number of parts
+ // using minimum merges.
+ var sizes []uint64
+ maxOutSize := uint64(0)
+ r := rand.New(rand.NewSource(1))
+ for i := 0; i < 1024; i++ {
+ n := uint64(uint32(r.NormFloat64() * 1e9))
+ n++
+ maxOutSize += n
+ sizes = append(sizes, n)
+ }
+ pws := newTestPartWrappersForSizes(sizes)
+
+ iterationsCount := 0
+ sizeMergedTotal := uint64(0)
+ for {
+ pms := appendPartsToMerge(nil, pws, maxOutSize)
+ if len(pms) == 0 {
+ break
+ }
+ m := make(map[*partWrapper]bool)
+ for _, pw := range pms {
+ m[pw] = true
+ }
+ var pwsNew []*partWrapper
+ size := uint64(0)
+ for _, pw := range pws {
+ if m[pw] {
+ size += pw.p.ph.CompressedSizeBytes
+ } else {
+ pwsNew = append(pwsNew, pw)
+ }
+ }
+ pw := &partWrapper{
+ p: &part{
+ ph: partHeader{
+ CompressedSizeBytes: size,
+ },
+ },
+ }
+ sizeMergedTotal += size
+ pwsNew = append(pwsNew, pw)
+ pws = pwsNew
+ iterationsCount++
+ }
+ sizes = newTestSizesFromPartWrappers(pws)
+ sizeTotal := uint64(0)
+ for _, size := range sizes {
+ sizeTotal += uint64(size)
+ }
+ overhead := float64(sizeMergedTotal) / float64(sizeTotal)
+ if overhead > 2.1 {
+ t.Fatalf("too big overhead; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f",
+ sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead)
+ }
+ if len(sizes) > 18 {
+ t.Fatalf("too many sizes %d; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f",
+ len(sizes), sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead)
+ }
+}
+
+func newTestSizesFromPartWrappers(pws []*partWrapper) []uint64 {
+ var sizes []uint64
+ for _, pw := range pws {
+ sizes = append(sizes, pw.p.ph.CompressedSizeBytes)
+ }
+ return sizes
+}
+
+func newTestPartWrappersForSizes(sizes []uint64) []*partWrapper {
+ var pws []*partWrapper
+ for _, size := range sizes {
+ pw := &partWrapper{
+ p: &part{
+ ph: partHeader{
+ CompressedSizeBytes: size,
+ },
+ },
+ }
+ pws = append(pws, pw)
+ }
+ return pws
+}
diff --git a/lib/logstorage/encoding.go b/lib/logstorage/encoding.go
new file mode 100644
index 000000000..48f05154d
--- /dev/null
+++ b/lib/logstorage/encoding.go
@@ -0,0 +1,314 @@
+package logstorage
+
+import (
+ "fmt"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+)
+
+// marshalStringsBlock marshals a and appends the result to dst.
+//
+// The marshaled strings block can be unmarshaled with stringsBlockUnmarshaler.
+func marshalStringsBlock(dst []byte, a []string) []byte {
+ // Encode string lengths
+ u64s := encoding.GetUint64s(len(a))
+ aLens := u64s.A[:0]
+ for _, s := range a {
+ aLens = append(aLens, uint64(len(s)))
+ }
+ u64s.A = aLens
+ dst = marshalUint64Block(dst, u64s.A)
+ encoding.PutUint64s(u64s)
+
+ // Encode strings
+ bb := bbPool.Get()
+ b := bb.B
+ for _, s := range a {
+ b = append(b, s...)
+ }
+ bb.B = b
+ dst = marshalBytesBlock(dst, bb.B)
+ bbPool.Put(bb)
+
+ return dst
+}
+
+// stringsBlockUnmarshaler is used for unmarshaling the block returned from marshalStringsBlock()
+//
+// use getStringsBlockUnmarshaler() for obtaining the unmarshaler from the pool in order to save memory allocations.
+type stringsBlockUnmarshaler struct {
+ // data contains the data for the unmarshaled values
+ data []byte
+}
+
+func (sbu *stringsBlockUnmarshaler) reset() {
+ sbu.data = sbu.data[:0]
+}
+
+// unmarshal unmarshals itemsCount strings from src, appends them to dst and returns the result.
+//
+// The returned strings are valid until sbu.reset() call.
+func (sbu *stringsBlockUnmarshaler) unmarshal(dst []string, src []byte, itemsCount uint64) ([]string, error) {
+ u64s := encoding.GetUint64s(0)
+ defer encoding.PutUint64s(u64s)
+
+ // Decode string lengths
+ var tail []byte
+ var err error
+ u64s.A, tail, err = unmarshalUint64Block(u64s.A[:0], src, itemsCount)
+ if err != nil {
+ return dst, fmt.Errorf("cannot unmarshal string lengths: %w", err)
+ }
+ aLens := u64s.A
+ src = tail
+
+ // Read bytes block into sbu.data
+ dataLen := len(sbu.data)
+ sbu.data, tail, err = unmarshalBytesBlock(sbu.data, src)
+ if err != nil {
+ return dst, fmt.Errorf("cannot unmarshal bytes block with strings: %w", err)
+ }
+ if len(tail) > 0 {
+ return dst, fmt.Errorf("unexpected non-empty tail after reading bytes block with strings; len(tail)=%d", len(tail))
+ }
+
+ // Decode strings from sbu.data into dst
+ data := sbu.data[dataLen:]
+ for _, sLen := range aLens {
+ if uint64(len(data)) < sLen {
+ return dst, fmt.Errorf("cannot unmarshal a string with the length %d bytes from %d bytes", sLen, len(data))
+ }
+ s := bytesutil.ToUnsafeString(data[:sLen])
+ data = data[sLen:]
+ dst = append(dst, s)
+ }
+
+ return dst, nil
+}
+
+// marshalUint64Block appends marshaled a to dst and returns the result.
+func marshalUint64Block(dst []byte, a []uint64) []byte {
+ bb := bbPool.Get()
+ bb.B = marshalUint64Items(bb.B[:0], a)
+ dst = marshalBytesBlock(dst, bb.B)
+ bbPool.Put(bb)
+ return dst
+}
+
+// unmarshalUint64Block appends unmarshaled from src itemsCount uint64 items to dst and returns the result.
+func unmarshalUint64Block(dst []uint64, src []byte, itemsCount uint64) ([]uint64, []byte, error) {
+ bb := bbPool.Get()
+ defer bbPool.Put(bb)
+
+ // Unmarshal the underlying bytes block
+ var err error
+ bb.B, src, err = unmarshalBytesBlock(bb.B[:0], src)
+ if err != nil {
+ return dst, src, fmt.Errorf("cannot unmarshal bytes block: %w", err)
+ }
+
+ // Unmarshal the items from bb.
+ dst, err = unmarshalUint64Items(dst, bb.B, itemsCount)
+ if err != nil {
+ return dst, src, fmt.Errorf("cannot unmarshal %d uint64 items from bytes block of length %d bytes: %w", itemsCount, len(bb.B), err)
+ }
+ return dst, src, nil
+}
+
+const (
+ uintBlockType8 = 0
+ uintBlockType16 = 1
+ uintBlockType32 = 2
+ uintBlockType64 = 3
+)
+
+// marshalUint64Items appends the marshaled a items to dst and returns the result.
+func marshalUint64Items(dst []byte, a []uint64) []byte {
+ // Do not marshal len(a), since it is expected that unmarshaler knows it.
+ nMax := uint64(0)
+ for _, n := range a {
+ if n > nMax {
+ nMax = n
+ }
+ }
+ switch {
+ case nMax < (1 << 8):
+ dst = append(dst, uintBlockType8)
+ for _, n := range a {
+ dst = append(dst, byte(n))
+ }
+ case nMax < (1 << 16):
+ dst = append(dst, uintBlockType16)
+ for _, n := range a {
+ dst = encoding.MarshalUint16(dst, uint16(n))
+ }
+ case nMax < (1 << 32):
+ dst = append(dst, uintBlockType32)
+ for _, n := range a {
+ dst = encoding.MarshalUint32(dst, uint32(n))
+ }
+ default:
+ dst = append(dst, uintBlockType64)
+ for _, n := range a {
+ dst = encoding.MarshalUint64(dst, uint64(n))
+ }
+ }
+ return dst
+}
+
+// unmarshalUint64Items appends unmarshaled from src itemsCount uint64 items to dst and returns the result.
+func unmarshalUint64Items(dst []uint64, src []byte, itemsCount uint64) ([]uint64, error) {
+ // Unmarshal block type
+ if len(src) < 1 {
+ return dst, fmt.Errorf("cannot unmarshal uint64 block type from empty src")
+ }
+ blockType := src[0]
+ src = src[1:]
+
+ switch blockType {
+ case uintBlockType8:
+ // A block with items smaller than 1<<8 bytes
+ if uint64(len(src)) != itemsCount {
+ return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), itemsCount)
+ }
+ for _, v := range src {
+ dst = append(dst, uint64(v))
+ }
+ case uintBlockType16:
+ // A block with items smaller than 1<<16 bytes
+ if uint64(len(src)) != 2*itemsCount {
+ return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 2*itemsCount)
+ }
+ for len(src) > 0 {
+ v := encoding.UnmarshalUint16(src)
+ src = src[2:]
+ dst = append(dst, uint64(v))
+ }
+ case uintBlockType32:
+ // A block with items smaller than 1<<32 bytes
+ if uint64(len(src)) != 4*itemsCount {
+ return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 4*itemsCount)
+ }
+ for len(src) > 0 {
+ v := encoding.UnmarshalUint32(src)
+ src = src[4:]
+ dst = append(dst, uint64(v))
+ }
+ case uintBlockType64:
+ // A block with items smaller than 1<<64 bytes
+ if uint64(len(src)) != 8*itemsCount {
+ return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 8*itemsCount)
+ }
+ for len(src) > 0 {
+ v := encoding.UnmarshalUint64(src)
+ src = src[8:]
+ dst = append(dst, v)
+ }
+ default:
+ return dst, fmt.Errorf("unexpected uint64 block type: %d; want 0, 1, 2 or 3", blockType)
+ }
+ return dst, nil
+}
+
+const (
+ marshalBytesTypePlain = 0
+ marshalBytesTypeZSTD = 1
+)
+
+func marshalBytesBlock(dst, src []byte) []byte {
+ if len(src) < 128 {
+ // Marshal the block in plain without compression
+ dst = append(dst, marshalBytesTypePlain)
+ dst = append(dst, byte(len(src)))
+ return append(dst, src...)
+ }
+
+ // Compress the block
+ dst = append(dst, marshalBytesTypeZSTD)
+ bb := bbPool.Get()
+ bb.B = encoding.CompressZSTDLevel(bb.B[:0], src, 1)
+ dst = encoding.MarshalVarUint64(dst, uint64(len(bb.B)))
+ dst = append(dst, bb.B...)
+ bbPool.Put(bb)
+ return dst
+}
+
+func unmarshalBytesBlock(dst, src []byte) ([]byte, []byte, error) {
+ if len(src) < 1 {
+ return dst, src, fmt.Errorf("cannot unmarshal block type from empty src")
+ }
+ blockType := src[0]
+ src = src[1:]
+ switch blockType {
+ case marshalBytesTypePlain:
+ // Plain block
+
+ // Read block length
+ if len(src) < 1 {
+ return dst, src, fmt.Errorf("cannot unmarshal plain block size from empty src")
+ }
+ blockLen := int(src[0])
+ src = src[1:]
+ if len(src) < blockLen {
+ return dst, src, fmt.Errorf("cannot read plain block with the size %d bytes from %b bytes", blockLen, len(src))
+ }
+
+ // Copy the block to dst
+ dst = append(dst, src[:blockLen]...)
+ src = src[blockLen:]
+ return dst, src, nil
+ case marshalBytesTypeZSTD:
+ // Compressed block
+
+ // Read block length
+ tail, blockLen, err := encoding.UnmarshalVarUint64(src)
+ if err != nil {
+ return dst, src, fmt.Errorf("cannot unmarshal compressed block size: %w", err)
+ }
+ src = tail
+ if uint64(len(src)) < blockLen {
+ return dst, src, fmt.Errorf("cannot read compressed block with the size %d bytes from %d bytes", blockLen, len(src))
+ }
+ compressedBlock := src[:blockLen]
+ src = src[blockLen:]
+
+ // Decompress the block
+ bb := bbPool.Get()
+ bb.B, err = encoding.DecompressZSTD(bb.B[:0], compressedBlock)
+ if err != nil {
+ return dst, src, fmt.Errorf("cannot decompress block: %w", err)
+ }
+
+ // Copy the decompressed block to dst.
+ dst = append(dst, bb.B...)
+ bbPool.Put(bb)
+ return dst, src, nil
+ default:
+ return dst, src, fmt.Errorf("unexpected block type: %d; supported types: 0, 1", blockType)
+ }
+}
+
+var bbPool bytesutil.ByteBufferPool
+
+// getStringsBlockUnmarshaler returns stringsBlockUnmarshaler from the pool.
+//
+// Return back the stringsBlockUnmarshaler to the pool by calling putStringsBlockUnmarshaler().
+func getStringsBlockUnmarshaler() *stringsBlockUnmarshaler {
+ v := sbuPool.Get()
+ if v == nil {
+ return &stringsBlockUnmarshaler{}
+ }
+ return v.(*stringsBlockUnmarshaler)
+}
+
+// putStringsBlockUnmarshaler returns back sbu to the pool.
+//
+// sbu mustn't be used after returning to the pool.
+func putStringsBlockUnmarshaler(sbu *stringsBlockUnmarshaler) {
+ sbu.reset()
+ sbuPool.Put(sbu)
+}
+
+var sbuPool sync.Pool
diff --git a/lib/logstorage/encoding_test.go b/lib/logstorage/encoding_test.go
new file mode 100644
index 000000000..3050e531b
--- /dev/null
+++ b/lib/logstorage/encoding_test.go
@@ -0,0 +1,86 @@
+package logstorage
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestMarshalUnmarshalStringsBlock(t *testing.T) {
+ f := func(logs string, blockLenExpected int) {
+ t.Helper()
+ var a []string
+ if logs != "" {
+ a = strings.Split(logs, "\n")
+ }
+ data := marshalStringsBlock(nil, a)
+ if len(data) != blockLenExpected {
+ t.Fatalf("unexpected block length; got %d; want %d; block=%q", len(data), blockLenExpected, data)
+ }
+ sbu := getStringsBlockUnmarshaler()
+ values, err := sbu.unmarshal(nil, data, uint64(len(a)))
+ if err != nil {
+ t.Fatalf("cannot unmarshal strings block: %s", err)
+ }
+ if !reflect.DeepEqual(values, a) {
+ t.Fatalf("unexpected strings after unmarshaling;\ngot\n%q\nwant\n%q", values, a)
+ }
+ putStringsBlockUnmarshaler(sbu)
+ }
+ f("", 5)
+ f("foo", 9)
+ f(`foo
+bar
+baz
+`, 18)
+ f(`
+Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
+Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
+Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
+Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz)
+Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz)
+Apr 28 13:43:37 localhost NetworkManager[1516]: [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL
+Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined")
+Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service...
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher'
+Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
+Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
+Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
+Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online
+Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1)
+Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034)
+Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034)
+Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal
+`, 951)
+
+ // Generate a string longer than 1<<16 bytes
+ s := "foo"
+ for len(s) < (1 << 16) {
+ s += s
+ }
+ s += "\n"
+ lines := s
+ f(lines, 36)
+ lines += s
+ f(lines, 52)
+
+ // Generate more than 256 strings
+ lines = ""
+ for i := 0; i < 1000; i++ {
+ lines += fmt.Sprintf("line %d\n", i)
+ }
+ f(lines, 766)
+}
diff --git a/lib/logstorage/encoding_timing_test.go b/lib/logstorage/encoding_timing_test.go
new file mode 100644
index 000000000..6bb0f21ff
--- /dev/null
+++ b/lib/logstorage/encoding_timing_test.go
@@ -0,0 +1,73 @@
+package logstorage
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+)
+
+func BenchmarkMarshalStringsBlock(b *testing.B) {
+ block := strings.Split(benchLogs, "\n")
+
+ b.SetBytes(int64(len(benchLogs)))
+ b.ReportAllocs()
+ b.RunParallel(func(pb *testing.PB) {
+ var buf []byte
+ for pb.Next() {
+ buf = marshalStringsBlock(buf[:0], block)
+ }
+ })
+}
+
+func BenchmarkStringsBlockUnmarshaler_Unmarshal(b *testing.B) {
+ block := strings.Split(benchLogs, "\n")
+ data := marshalStringsBlock(nil, block)
+
+ b.SetBytes(int64(len(benchLogs)))
+ b.ReportAllocs()
+ b.RunParallel(func(pb *testing.PB) {
+ sbu := getStringsBlockUnmarshaler()
+ var values []string
+ for pb.Next() {
+ var err error
+ values, err = sbu.unmarshal(values[:0], data, uint64(len(block)))
+ if err != nil {
+ panic(fmt.Errorf("unexpected error: %w", err))
+ }
+ sbu.reset()
+ }
+ putStringsBlockUnmarshaler(sbu)
+ })
+}
+
+const benchLogs = `
+Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
+Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
+Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
+Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz)
+Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz)
+Apr 28 13:43:37 localhost NetworkManager[1516]: [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL
+Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined")
+Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service...
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10
+Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher'
+Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
+Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
+Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
+Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online
+Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1)
+Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034)
+Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034)
+Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400)
+Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal
+Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal
+`
diff --git a/lib/logstorage/filenames.go b/lib/logstorage/filenames.go
new file mode 100644
index 000000000..cd4ac99c5
--- /dev/null
+++ b/lib/logstorage/filenames.go
@@ -0,0 +1,22 @@
+package logstorage
+
+const (
+ metaindexFilename = "metaindex.bin"
+ indexFilename = "index.bin"
+ columnsHeaderFilename = "columns_header.bin"
+ timestampsFilename = "timestamps.bin"
+ fieldValuesFilename = "field_values.bin"
+ fieldBloomFilename = "field_bloom.bin"
+ messageValuesFilename = "message_values.bin"
+ messageBloomFilename = "message_bloom.bin"
+
+ metadataFilename = "metadata.json"
+ partsFilename = "parts.json"
+
+ streamIDCacheFilename = "stream_id.bin"
+
+ indexdbDirname = "indexdb"
+ datadbDirname = "datadb"
+ cacheDirname = "cache"
+ partitionsDirname = "partitions"
+)
diff --git a/lib/logstorage/filters.go b/lib/logstorage/filters.go
new file mode 100644
index 000000000..55a8a9905
--- /dev/null
+++ b/lib/logstorage/filters.go
@@ -0,0 +1,3053 @@
+package logstorage
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+ "unicode/utf8"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+func getFilterBitmap(bitsLen int) *filterBitmap {
+ v := filterBitmapPool.Get()
+ if v == nil {
+ v = &filterBitmap{}
+ }
+ bm := v.(*filterBitmap)
+ bm.init(bitsLen)
+ return bm
+}
+
+func putFilterBitmap(bm *filterBitmap) {
+ bm.reset()
+ filterBitmapPool.Put(bm)
+}
+
+var filterBitmapPool sync.Pool
+
+type filterBitmap struct {
+ a []uint64
+ bitsLen int
+}
+
+func (bm *filterBitmap) reset() {
+ bm.resetBits()
+ bm.a = bm.a[:0]
+
+ bm.bitsLen = 0
+}
+
+func (bm *filterBitmap) copyFrom(src *filterBitmap) {
+ bm.reset()
+
+ bm.a = append(bm.a[:0], src.a...)
+ bm.bitsLen = src.bitsLen
+}
+
+func (bm *filterBitmap) init(bitsLen int) {
+ a := bm.a
+ wordsLen := (bitsLen + 63) / 64
+ if n := wordsLen - cap(a); n > 0 {
+ a = append(a[:cap(a)], make([]uint64, n)...)
+ }
+ a = a[:wordsLen]
+ bm.a = a
+ bm.bitsLen = bitsLen
+}
+
+func (bm *filterBitmap) resetBits() {
+ a := bm.a
+ for i := range a {
+ a[i] = 0
+ }
+}
+
+func (bm *filterBitmap) setBits() {
+ a := bm.a
+ for i := range a {
+ a[i] = ^uint64(0)
+ }
+ tailBits := bm.bitsLen % 64
+ if tailBits > 0 && len(a) > 0 {
+ // Zero bits outside bitsLen at the last word
+ a[len(a)-1] &= (uint64(1) << tailBits) - 1
+ }
+}
+
+func (bm *filterBitmap) isZero() bool {
+ for _, word := range bm.a {
+ if word != 0 {
+ return false
+ }
+ }
+ return true
+}
+
+func (bm *filterBitmap) andNot(x *filterBitmap) {
+ if bm.bitsLen != x.bitsLen {
+ logger.Panicf("BUG: cannot merge bitmaps with distinct lengths; %d vs %d", bm.bitsLen, x.bitsLen)
+ }
+ a := bm.a
+ b := x.a
+ for i := range a {
+ a[i] &= ^b[i]
+ }
+}
+
+func (bm *filterBitmap) or(x *filterBitmap) {
+ if bm.bitsLen != x.bitsLen {
+ logger.Panicf("BUG: cannot merge bitmaps with distinct lengths; %d vs %d", bm.bitsLen, x.bitsLen)
+ }
+ a := bm.a
+ b := x.a
+ for i := range a {
+ a[i] |= b[i]
+ }
+}
+
+// forEachSetBit calls f for each set bit and clears that bit if f returns false
+func (bm *filterBitmap) forEachSetBit(f func(idx int) bool) {
+ a := bm.a
+ bitsLen := bm.bitsLen
+ for i, word := range a {
+ if word == 0 {
+ continue
+ }
+ for j := 0; j < 64; j++ {
+ mask := uint64(1) << j
+ if (word & mask) == 0 {
+ continue
+ }
+ idx := i*64 + j
+ if idx >= bitsLen {
+ break
+ }
+ if !f(idx) {
+ a[i] &= ^mask
+ }
+ }
+ }
+}
+
+type filter interface {
+ // String returns string representation of the filter
+ String() string
+
+ // updateReferencedColumnNames updates m with the column names referenced by the filter
+ updateReferencedColumnNames(m map[string]struct{})
+
+ // apply must update bm according to the filter applied to the given bs block
+ apply(bs *blockSearch, bm *filterBitmap)
+}
+
+// noopFilter does nothing
+type noopFilter struct {
+}
+
+func (nf *noopFilter) String() string {
+ return ""
+}
+
+func (nf *noopFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ // nothing to do
+}
+
+func (nf *noopFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ // nothing to do
+}
+
+// orFilter contains filters joined by OR operator.
+//
+// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL.
+type orFilter struct {
+ filters []filter
+}
+
+func (of *orFilter) String() string {
+ filters := of.filters
+ a := make([]string, len(filters))
+ for i, f := range filters {
+ s := f.String()
+ a[i] = s
+ }
+ return strings.Join(a, " or ")
+}
+
+func (of *orFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ for _, f := range of.filters {
+ f.updateReferencedColumnNames(m)
+ }
+}
+
+func (of *orFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ bmResult := getFilterBitmap(bm.bitsLen)
+ bmTmp := getFilterBitmap(bm.bitsLen)
+ for _, f := range of.filters {
+ // Minimize the number of rows to check by the filter by checking only
+ // the rows, which may change the output bm:
+ // - bm matches them, e.g. the caller wants to get them
+ // - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
+ bmTmp.copyFrom(bm)
+ bmTmp.andNot(bmResult)
+ if bmTmp.isZero() {
+ // Shortcut - there is no need in applying the remaining filters,
+ // since the result already matches all the values from the block.
+ break
+ }
+ f.apply(bs, bmTmp)
+ bmResult.or(bmTmp)
+ }
+ putFilterBitmap(bmTmp)
+ bm.copyFrom(bmResult)
+ putFilterBitmap(bmResult)
+}
+
+// andFilter contains filters joined by AND opertor.
+//
+// It is expressed as `f1 AND f2 ... AND fN` in LogsQL.
+type andFilter struct {
+ filters []filter
+}
+
+func (af *andFilter) String() string {
+ filters := af.filters
+ a := make([]string, len(filters))
+ for i, f := range filters {
+ s := f.String()
+ switch f.(type) {
+ case *orFilter:
+ s = "(" + s + ")"
+ }
+ a[i] = s
+ }
+ return strings.Join(a, " ")
+}
+
+func (af *andFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ for _, f := range af.filters {
+ f.updateReferencedColumnNames(m)
+ }
+}
+
+func (af *andFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ for _, f := range af.filters {
+ f.apply(bs, bm)
+ if bm.isZero() {
+ // Shortcut - there is no need in applying the remaining filters,
+ // since the result will be zero anyway.
+ return
+ }
+ }
+}
+
+// notFilter negates the filter.
+//
+// It is expressed as `NOT f` or `!f` in LogsQL.
+type notFilter struct {
+ f filter
+}
+
+func (nf *notFilter) String() string {
+ s := nf.f.String()
+ switch nf.f.(type) {
+ case *andFilter, *orFilter:
+ s = "(" + s + ")"
+ }
+ return "!" + s
+}
+
+func (nf *notFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ nf.f.updateReferencedColumnNames(m)
+}
+
+func (nf *notFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ // Minimize the number of rows to check by the filter by applying it
+ // only to the rows, which match the bm, e.g. they may change the bm result.
+ bmTmp := getFilterBitmap(bm.bitsLen)
+ bmTmp.copyFrom(bm)
+ nf.f.apply(bs, bmTmp)
+ bm.andNot(bmTmp)
+ putFilterBitmap(bmTmp)
+}
+
+// streamFilter is the filter for `_stream:{...}`
+type streamFilter struct {
+ // f is the filter to apply
+ f *StreamFilter
+
+ // tenantIDs is the list of tenantIDs to search for streamIDs.
+ tenantIDs []TenantID
+
+ // idb is the indexdb to search for streamIDs.
+ idb *indexdb
+
+ streamIDsOnce sync.Once
+ streamIDs map[streamID]struct{}
+}
+
+func (sf *streamFilter) String() string {
+ s := sf.f.String()
+ if s == "{}" {
+ return ""
+ }
+ return "_stream:" + s
+}
+
+func (sf *streamFilter) getStreamIDs() map[streamID]struct{} {
+ sf.streamIDsOnce.Do(sf.initStreamIDs)
+ return sf.streamIDs
+}
+
+func (sf *streamFilter) initStreamIDs() {
+ streamIDs := sf.idb.searchStreamIDs(sf.tenantIDs, sf.f)
+ m := make(map[streamID]struct{}, len(streamIDs))
+ for i := range streamIDs {
+ m[streamIDs[i]] = struct{}{}
+ }
+ sf.streamIDs = m
+}
+
+func (sf *streamFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m["_stream"] = struct{}{}
+}
+
+func (sf *streamFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ if sf.f.isEmpty() {
+ return
+ }
+ streamIDs := sf.getStreamIDs()
+ if _, ok := streamIDs[bs.bsw.bh.streamID]; !ok {
+ bm.resetBits()
+ return
+ }
+}
+
+// timeFilter filters by time.
+//
+// It is expressed as `_time:(start, end]` in LogsQL.
+type timeFilter struct {
+ minTimestamp int64
+ maxTimestamp int64
+
+ stringRepr string
+}
+
+func (tf *timeFilter) String() string {
+ return "_time:" + tf.stringRepr
+}
+
+func (tf *timeFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m["_time"] = struct{}{}
+}
+
+func (tf *timeFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ minTimestamp := tf.minTimestamp
+ maxTimestamp := tf.maxTimestamp
+
+ if minTimestamp > maxTimestamp {
+ bm.resetBits()
+ return
+ }
+
+ th := bs.bsw.bh.timestampsHeader
+ if minTimestamp > th.maxTimestamp || maxTimestamp < th.minTimestamp {
+ bm.resetBits()
+ return
+ }
+ if minTimestamp <= th.minTimestamp && maxTimestamp >= th.maxTimestamp {
+ return
+ }
+
+ timestamps := bs.getTimestamps()
+ bm.forEachSetBit(func(idx int) bool {
+ ts := timestamps[idx]
+ return ts >= minTimestamp && ts <= maxTimestamp
+ })
+}
+
+// sequenceFilter matches an ordered sequence of phrases
+//
+// Example LogsQL: `fieldName:seq(foo, "bar baz")`
+type sequenceFilter struct {
+ fieldName string
+ phrases []string
+
+ tokensOnce sync.Once
+ tokens []string
+
+ nonEmptyPhrasesOnce sync.Once
+ nonEmptyPhrases []string
+}
+
+func (sf *sequenceFilter) String() string {
+ phrases := sf.phrases
+ a := make([]string, len(phrases))
+ for i, phrase := range phrases {
+ a[i] = quoteTokenIfNeeded(phrase)
+ }
+ return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(sf.fieldName), strings.Join(a, ","))
+}
+
+func (sf *sequenceFilter) getTokens() []string {
+ sf.tokensOnce.Do(sf.initTokens)
+ return sf.tokens
+}
+
+func (sf *sequenceFilter) initTokens() {
+ phrases := sf.getNonEmptyPhrases()
+ tokens := tokenizeStrings(nil, phrases)
+ sf.tokens = tokens
+}
+
+func (sf *sequenceFilter) getNonEmptyPhrases() []string {
+ sf.nonEmptyPhrasesOnce.Do(sf.initNonEmptyPhrases)
+ return sf.nonEmptyPhrases
+}
+
+func (sf *sequenceFilter) initNonEmptyPhrases() {
+ phrases := sf.phrases
+ result := make([]string, 0, len(phrases))
+ for _, phrase := range phrases {
+ if phrase != "" {
+ result = append(result, phrase)
+ }
+ }
+ sf.nonEmptyPhrases = result
+}
+
+func (sf *sequenceFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[sf.fieldName] = struct{}{}
+}
+
+func (sf *sequenceFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := sf.fieldName
+ phrases := sf.getNonEmptyPhrases()
+
+ if len(phrases) == 0 {
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchSequence(v, phrases) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ // It matches anything only for empty phrase.
+ if !matchSequence("", phrases) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokens := sf.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringBySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeDict:
+ matchValuesDictBySequence(bs, ch, bm, phrases)
+ case valueTypeUint8:
+ matchUint8BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeUint16:
+ matchUint16BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeUint32:
+ matchUint32BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeUint64:
+ matchUint64BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeFloat64:
+ matchFloat64BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeIPv4:
+ matchIPv4BySequence(bs, ch, bm, phrases, tokens)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601BySequence(bs, ch, bm, phrases, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// exactPrefixFilter matches the exact prefix.
+//
+// Example LogsQL: `fieldName:exact_prefix("foo bar")
+type exactPrefixFilter struct {
+ fieldName string
+ prefix string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (ef *exactPrefixFilter) String() string {
+ return fmt.Sprintf("%sexact_prefix(%s)", quoteFieldNameIfNeeded(ef.fieldName), quoteTokenIfNeeded(ef.prefix))
+}
+
+func (ef *exactPrefixFilter) getTokens() []string {
+ ef.tokensOnce.Do(ef.initTokens)
+ return ef.tokens
+}
+
+func (ef *exactPrefixFilter) initTokens() {
+ ef.tokens = getTokensSkipLast(ef.prefix)
+}
+
+func (ef *exactPrefixFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[ef.fieldName] = struct{}{}
+}
+
+func (ef *exactPrefixFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := ef.fieldName
+ prefix := ef.prefix
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchExactPrefix(v, prefix) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ if !matchExactPrefix("", prefix) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokens := ef.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeDict:
+ matchValuesDictByExactPrefix(bs, ch, bm, prefix)
+ case valueTypeUint8:
+ matchUint8ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeUint16:
+ matchUint16ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeUint32:
+ matchUint32ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeUint64:
+ matchUint64ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeFloat64:
+ matchFloat64ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByExactPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByExactPrefix(bs, ch, bm, prefix, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// exactFilter matches the exact value.
+//
+// Example LogsQL: `fieldName:exact("foo bar")`
+type exactFilter struct {
+ fieldName string
+ value string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (ef *exactFilter) String() string {
+ return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(ef.fieldName), quoteTokenIfNeeded(ef.value))
+}
+
+func (ef *exactFilter) getTokens() []string {
+ ef.tokensOnce.Do(ef.initTokens)
+ return ef.tokens
+}
+
+func (ef *exactFilter) initTokens() {
+ ef.tokens = tokenizeStrings(nil, []string{ef.value})
+}
+
+func (ef *exactFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[ef.fieldName] = struct{}{}
+}
+
+func (ef *exactFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := ef.fieldName
+ value := ef.value
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if value != v {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ // It matches anything only for empty value.
+ if value != "" {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokens := ef.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeDict:
+ matchValuesDictByExactValue(bs, ch, bm, value)
+ case valueTypeUint8:
+ matchUint8ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeUint16:
+ matchUint16ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeUint32:
+ matchUint32ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeUint64:
+ matchUint64ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeFloat64:
+ matchFloat64ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByExactValue(bs, ch, bm, value, tokens)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByExactValue(bs, ch, bm, value, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// inFilter matches any exact value from the values map.
+//
+// Example LogsQL: `fieldName:in("foo", "bar baz")`
+type inFilter struct {
+ fieldName string
+ values []string
+
+ tokenSetsOnce sync.Once
+ tokenSets [][]string
+
+ stringValuesOnce sync.Once
+ stringValues map[string]struct{}
+
+ uint8ValuesOnce sync.Once
+ uint8Values map[string]struct{}
+
+ uint16ValuesOnce sync.Once
+ uint16Values map[string]struct{}
+
+ uint32ValuesOnce sync.Once
+ uint32Values map[string]struct{}
+
+ uint64ValuesOnce sync.Once
+ uint64Values map[string]struct{}
+
+ float64ValuesOnce sync.Once
+ float64Values map[string]struct{}
+
+ ipv4ValuesOnce sync.Once
+ ipv4Values map[string]struct{}
+
+ timestampISO8601ValuesOnce sync.Once
+ timestampISO8601Values map[string]struct{}
+}
+
+func (af *inFilter) String() string {
+ values := af.values
+ a := make([]string, len(values))
+ for i, value := range values {
+ a[i] = quoteTokenIfNeeded(value)
+ }
+ return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(af.fieldName), strings.Join(a, ","))
+}
+
+func (af *inFilter) getTokenSets() [][]string {
+ af.tokenSetsOnce.Do(af.initTokenSets)
+ return af.tokenSets
+}
+
+// It is faster to match every row in the block instead of checking too big number of tokenSets against bloom filter.
+const maxTokenSetsToInit = 1000
+
+func (af *inFilter) initTokenSets() {
+ values := af.values
+ tokenSetsLen := len(values)
+ if tokenSetsLen > maxTokenSetsToInit {
+ tokenSetsLen = maxTokenSetsToInit
+ }
+ tokenSets := make([][]string, 0, tokenSetsLen+1)
+ for _, v := range values {
+ tokens := tokenizeStrings(nil, []string{v})
+ tokenSets = append(tokenSets, tokens)
+ if len(tokens) > maxTokenSetsToInit {
+ break
+ }
+ }
+ af.tokenSets = tokenSets
+}
+
+func (af *inFilter) getStringValues() map[string]struct{} {
+ af.stringValuesOnce.Do(af.initStringValues)
+ return af.stringValues
+}
+
+func (af *inFilter) initStringValues() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ for _, v := range values {
+ m[v] = struct{}{}
+ }
+ af.stringValues = m
+}
+
+func (af *inFilter) getUint8Values() map[string]struct{} {
+ af.uint8ValuesOnce.Do(af.initUint8Values)
+ return af.uint8Values
+}
+
+func (af *inFilter) initUint8Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*1)
+ for _, v := range values {
+ n, ok := tryParseUint64(v)
+ if !ok || n >= (1<<8) {
+ continue
+ }
+ bufLen := len(buf)
+ buf = append(buf, byte(n))
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.uint8Values = m
+}
+
+func (af *inFilter) getUint16Values() map[string]struct{} {
+ af.uint16ValuesOnce.Do(af.initUint16Values)
+ return af.uint16Values
+}
+
+func (af *inFilter) initUint16Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*2)
+ for _, v := range values {
+ n, ok := tryParseUint64(v)
+ if !ok || n >= (1<<16) {
+ continue
+ }
+ bufLen := len(buf)
+ buf = encoding.MarshalUint16(buf, uint16(n))
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.uint16Values = m
+}
+
+func (af *inFilter) getUint32Values() map[string]struct{} {
+ af.uint32ValuesOnce.Do(af.initUint32Values)
+ return af.uint32Values
+}
+
+func (af *inFilter) initUint32Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*4)
+ for _, v := range values {
+ n, ok := tryParseUint64(v)
+ if !ok || n >= (1<<32) {
+ continue
+ }
+ bufLen := len(buf)
+ buf = encoding.MarshalUint32(buf, uint32(n))
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.uint32Values = m
+}
+
+func (af *inFilter) getUint64Values() map[string]struct{} {
+ af.uint64ValuesOnce.Do(af.initUint64Values)
+ return af.uint64Values
+}
+
+func (af *inFilter) initUint64Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*8)
+ for _, v := range values {
+ n, ok := tryParseUint64(v)
+ if !ok {
+ continue
+ }
+ bufLen := len(buf)
+ buf = encoding.MarshalUint64(buf, n)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.uint64Values = m
+}
+
+func (af *inFilter) getFloat64Values() map[string]struct{} {
+ af.float64ValuesOnce.Do(af.initFloat64Values)
+ return af.float64Values
+}
+
+func (af *inFilter) initFloat64Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*8)
+ for _, v := range values {
+ f, ok := tryParseFloat64(v)
+ if !ok {
+ continue
+ }
+ n := math.Float64bits(f)
+ bufLen := len(buf)
+ buf = encoding.MarshalUint64(buf, n)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.float64Values = m
+}
+
+func (af *inFilter) getIPv4Values() map[string]struct{} {
+ af.ipv4ValuesOnce.Do(af.initIPv4Values)
+ return af.ipv4Values
+}
+
+func (af *inFilter) initIPv4Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*4)
+ for _, v := range values {
+ n, ok := tryParseIPv4(v)
+ if !ok {
+ continue
+ }
+ bufLen := len(buf)
+ buf = encoding.MarshalUint32(buf, uint32(n))
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.ipv4Values = m
+}
+
+func (af *inFilter) getTimestampISO8601Values() map[string]struct{} {
+ af.timestampISO8601ValuesOnce.Do(af.initTimestampISO8601Values)
+ return af.timestampISO8601Values
+}
+
+func (af *inFilter) initTimestampISO8601Values() {
+ values := af.values
+ m := make(map[string]struct{}, len(values))
+ buf := make([]byte, 0, len(values)*8)
+ for _, v := range values {
+ n, ok := tryParseTimestampISO8601(v)
+ if !ok {
+ continue
+ }
+ bufLen := len(buf)
+ buf = encoding.MarshalUint64(buf, n)
+ s := bytesutil.ToUnsafeString(buf[bufLen:])
+ m[s] = struct{}{}
+ }
+ af.timestampISO8601Values = m
+}
+
+func (af *inFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[af.fieldName] = struct{}{}
+}
+
+func (af *inFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := af.fieldName
+
+ if len(af.values) == 0 {
+ bm.resetBits()
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ stringValues := af.getStringValues()
+ if _, ok := stringValues[v]; !ok {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ // It matches anything only for empty phrase.
+ stringValues := af.getStringValues()
+ if _, ok := stringValues[""]; !ok {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokenSets := af.getTokenSets()
+
+ switch ch.valueType {
+ case valueTypeString:
+ stringValues := af.getStringValues()
+ matchAnyValue(bs, ch, bm, stringValues, tokenSets)
+ case valueTypeDict:
+ stringValues := af.getStringValues()
+ matchValuesDictByAnyValue(bs, ch, bm, stringValues)
+ case valueTypeUint8:
+ binValues := af.getUint8Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeUint16:
+ binValues := af.getUint16Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeUint32:
+ binValues := af.getUint32Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeUint64:
+ binValues := af.getUint64Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeFloat64:
+ binValues := af.getFloat64Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeIPv4:
+ binValues := af.getIPv4Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ case valueTypeTimestampISO8601:
+ binValues := af.getTimestampISO8601Values()
+ matchAnyValue(bs, ch, bm, binValues, tokenSets)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// ipv4RangeFilter matches the given ipv4 range [minValue..maxValue].
+//
+// Example LogsQL: `fieldName:ipv4_range(127.0.0.1, 127.0.0.255)`
+type ipv4RangeFilter struct {
+ fieldName string
+ minValue uint32
+ maxValue uint32
+}
+
+func (rf *ipv4RangeFilter) String() string {
+ minValue := string(encoding.MarshalUint32(nil, rf.minValue))
+ maxValue := string(encoding.MarshalUint32(nil, rf.maxValue))
+ return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(rf.fieldName), toIPv4String(nil, minValue), toIPv4String(nil, maxValue))
+}
+
+func (rf *ipv4RangeFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[rf.fieldName] = struct{}{}
+}
+
+func (rf *ipv4RangeFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := rf.fieldName
+ minValue := rf.minValue
+ maxValue := rf.maxValue
+
+ if minValue > maxValue {
+ bm.resetBits()
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchIPv4Range(v, minValue, maxValue) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ bm.resetBits()
+ return
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByIPv4Range(bs, ch, bm, minValue, maxValue)
+ case valueTypeDict:
+ matchValuesDictByIPv4Range(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint8:
+ bm.resetBits()
+ case valueTypeUint16:
+ bm.resetBits()
+ case valueTypeUint32:
+ bm.resetBits()
+ case valueTypeUint64:
+ bm.resetBits()
+ case valueTypeFloat64:
+ bm.resetBits()
+ case valueTypeIPv4:
+ matchIPv4ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeTimestampISO8601:
+ bm.resetBits()
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// stringRangeFilter matches tie given string range [minValue..maxValue)
+//
+// Note that the minValue is included in the range, while the maxValue isn't included in the range.
+// This simplifies querying distincts log sets with string_range(A, B), string_range(B, C), etc.
+//
+// Example LogsQL: `fieldName:string_range(minValue, maxValue)`
+type stringRangeFilter struct {
+ fieldName string
+ minValue string
+ maxValue string
+}
+
+func (rf *stringRangeFilter) String() string {
+ return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(rf.fieldName), quoteTokenIfNeeded(rf.minValue), quoteTokenIfNeeded(rf.maxValue))
+}
+
+func (rf *stringRangeFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[rf.fieldName] = struct{}{}
+}
+
+func (rf *stringRangeFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := rf.fieldName
+ minValue := rf.minValue
+ maxValue := rf.maxValue
+
+ if minValue > maxValue {
+ bm.resetBits()
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchStringRange(v, minValue, maxValue) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ if !matchStringRange("", minValue, maxValue) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeDict:
+ matchValuesDictByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint8:
+ matchUint8ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint16:
+ matchUint16ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint32:
+ matchUint32ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint64:
+ matchUint64ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeFloat64:
+ matchFloat64ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeIPv4:
+ matchIPv4ByStringRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByStringRange(bs, ch, bm, minValue, maxValue)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// lenRangeFilter matches field values with the length in the given range [minLen, maxLen].
+//
+// Example LogsQL: `fieldName:len_range(10, 20)`
+type lenRangeFilter struct {
+ fieldName string
+ minLen uint64
+ maxLen uint64
+}
+
+func (rf *lenRangeFilter) String() string {
+ return quoteFieldNameIfNeeded(rf.fieldName) + fmt.Sprintf("len_range(%d,%d)", rf.minLen, rf.maxLen)
+}
+
+func (rf *lenRangeFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[rf.fieldName] = struct{}{}
+}
+
+func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := rf.fieldName
+ minLen := rf.minLen
+ maxLen := rf.maxLen
+
+ if minLen > maxLen {
+ bm.resetBits()
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchLenRange(v, minLen, maxLen) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ if !matchLenRange("", minLen, maxLen) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeDict:
+ matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeUint8:
+ matchUint8ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeUint16:
+ matchUint16ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeUint32:
+ matchUint32ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeUint64:
+ matchUint64ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeFloat64:
+ matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeIPv4:
+ matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// rangeFilter matches the given range [minValue..maxValue].
+//
+// Example LogsQL: `fieldName:range(minValue, maxValue]`
+type rangeFilter struct {
+ fieldName string
+ minValue float64
+ maxValue float64
+
+ stringRepr string
+}
+
+func (rf *rangeFilter) String() string {
+ return quoteFieldNameIfNeeded(rf.fieldName) + "range" + rf.stringRepr
+}
+
+func (rf *rangeFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[rf.fieldName] = struct{}{}
+}
+
+func (rf *rangeFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := rf.fieldName
+ minValue := rf.minValue
+ maxValue := rf.maxValue
+
+ if minValue > maxValue {
+ bm.resetBits()
+ return
+ }
+
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchRange(v, minValue, maxValue) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ bm.resetBits()
+ return
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeDict:
+ matchValuesDictByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint8:
+ matchUint8ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint16:
+ matchUint16ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint32:
+ matchUint32ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeUint64:
+ matchUint64ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeFloat64:
+ matchFloat64ByRange(bs, ch, bm, minValue, maxValue)
+ case valueTypeIPv4:
+ bm.resetBits()
+ case valueTypeTimestampISO8601:
+ bm.resetBits()
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// regexpFilter matches the given regexp
+//
+// Example LogsQL: `fieldName:re("regexp")`
+type regexpFilter struct {
+ fieldName string
+ re *regexp.Regexp
+}
+
+func (rf *regexpFilter) String() string {
+ return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(rf.fieldName), rf.re.String())
+}
+
+func (rf *regexpFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[rf.fieldName] = struct{}{}
+}
+
+func (rf *regexpFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := rf.fieldName
+ re := rf.re
+
+ // Verify whether filter matches const column
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !re.MatchString(v) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether filter matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ if !re.MatchString("") {
+ bm.resetBits()
+ }
+ return
+ }
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByRegexp(bs, ch, bm, re)
+ case valueTypeDict:
+ matchValuesDictByRegexp(bs, ch, bm, re)
+ case valueTypeUint8:
+ matchUint8ByRegexp(bs, ch, bm, re)
+ case valueTypeUint16:
+ matchUint16ByRegexp(bs, ch, bm, re)
+ case valueTypeUint32:
+ matchUint32ByRegexp(bs, ch, bm, re)
+ case valueTypeUint64:
+ matchUint64ByRegexp(bs, ch, bm, re)
+ case valueTypeFloat64:
+ matchFloat64ByRegexp(bs, ch, bm, re)
+ case valueTypeIPv4:
+ matchIPv4ByRegexp(bs, ch, bm, re)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByRegexp(bs, ch, bm, re)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// anyCasePrefixFilter matches the given prefix in lower, upper and mixed case.
+//
+// Example LogsQL: `fieldName:i(prefix*)` or `fieldName:i("some prefix"*)`
+//
+// A special case `fieldName:i(*)` equals to `fieldName:*` and matches non-emtpy value for the given `fieldName` field.
+type anyCasePrefixFilter struct {
+ fieldName string
+ prefix string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (pf *anyCasePrefixFilter) String() string {
+ if pf.prefix == "" {
+ return quoteFieldNameIfNeeded(pf.fieldName) + "i(*)"
+ }
+ return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.prefix))
+}
+
+func (pf *anyCasePrefixFilter) getTokens() []string {
+ pf.tokensOnce.Do(pf.initTokens)
+ return pf.tokens
+}
+
+func (pf *anyCasePrefixFilter) initTokens() {
+ pf.tokens = getTokensSkipLast(pf.prefix)
+}
+
+func (pf *anyCasePrefixFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[pf.fieldName] = struct{}{}
+}
+
+func (pf *anyCasePrefixFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := pf.fieldName
+ prefixLowercase := strings.ToLower(pf.prefix)
+
+ // Verify whether pf matches const column
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchAnyCasePrefix(v, prefixLowercase) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether pf matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ bm.resetBits()
+ return
+ }
+
+ tokens := pf.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByAnyCasePrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeDict:
+ matchValuesDictByAnyCasePrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeUint8:
+ matchUint8ByPrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeUint16:
+ matchUint16ByPrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeUint32:
+ matchUint32ByPrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeUint64:
+ matchUint64ByPrefix(bs, ch, bm, prefixLowercase)
+ case valueTypeFloat64:
+ matchFloat64ByPrefix(bs, ch, bm, prefixLowercase, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens)
+ case valueTypeTimestampISO8601:
+ prefixUppercase := strings.ToUpper(pf.prefix)
+ matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// prefixFilter matches the given prefix.
+//
+// Example LogsQL: `fieldName:prefix*` or `fieldName:"some prefix"*`
+//
+// A special case `fieldName:*` matches non-empty value for the given `fieldName` field
+type prefixFilter struct {
+ fieldName string
+ prefix string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (pf *prefixFilter) String() string {
+ if pf.prefix == "" {
+ return quoteFieldNameIfNeeded(pf.fieldName) + "*"
+ }
+ return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.prefix))
+}
+
+func (pf *prefixFilter) getTokens() []string {
+ pf.tokensOnce.Do(pf.initTokens)
+ return pf.tokens
+}
+
+func (pf *prefixFilter) initTokens() {
+ pf.tokens = getTokensSkipLast(pf.prefix)
+}
+
+func (pf *prefixFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[pf.fieldName] = struct{}{}
+}
+
+func (pf *prefixFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := pf.fieldName
+ prefix := pf.prefix
+
+ // Verify whether pf matches const column
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchPrefix(v, prefix) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether pf matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ bm.resetBits()
+ return
+ }
+
+ tokens := pf.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeDict:
+ matchValuesDictByPrefix(bs, ch, bm, prefix)
+ case valueTypeUint8:
+ matchUint8ByPrefix(bs, ch, bm, prefix)
+ case valueTypeUint16:
+ matchUint16ByPrefix(bs, ch, bm, prefix)
+ case valueTypeUint32:
+ matchUint32ByPrefix(bs, ch, bm, prefix)
+ case valueTypeUint64:
+ matchUint64ByPrefix(bs, ch, bm, prefix)
+ case valueTypeFloat64:
+ matchFloat64ByPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByPrefix(bs, ch, bm, prefix, tokens)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByPrefix(bs, ch, bm, prefix, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// anyCasePhraseFilter filters field entries by case-insensitive phrase match.
+//
+// An example LogsQL query: `fieldName:i(word)` or `fieldName:i("word1 ... wordN")`
+type anyCasePhraseFilter struct {
+ fieldName string
+ phrase string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (pf *anyCasePhraseFilter) String() string {
+ return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.phrase))
+}
+
+func (pf *anyCasePhraseFilter) getTokens() []string {
+ pf.tokensOnce.Do(pf.initTokens)
+ return pf.tokens
+}
+
+func (pf *anyCasePhraseFilter) initTokens() {
+ pf.tokens = tokenizeStrings(nil, []string{pf.phrase})
+}
+
+func (pf *anyCasePhraseFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[pf.fieldName] = struct{}{}
+}
+
+func (pf *anyCasePhraseFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := pf.fieldName
+ phraseLowercase := strings.ToLower(pf.phrase)
+
+ // Verify whether pf matches const column
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchAnyCasePhrase(v, phraseLowercase) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether pf matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ // It matches anything only for empty phrase.
+ if len(phraseLowercase) > 0 {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokens := pf.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByAnyCasePhrase(bs, ch, bm, phraseLowercase)
+ case valueTypeDict:
+ matchValuesDictByAnyCasePhrase(bs, ch, bm, phraseLowercase)
+ case valueTypeUint8:
+ matchUint8ByExactValue(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeUint16:
+ matchUint16ByExactValue(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeUint32:
+ matchUint32ByExactValue(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeUint64:
+ matchUint64ByExactValue(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeFloat64:
+ matchFloat64ByPhrase(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens)
+ case valueTypeTimestampISO8601:
+ phraseUppercase := strings.ToUpper(pf.phrase)
+ matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+// phraseFilter filters field entries by phrase match (aka full text search).
+//
+// A phrase consists of any number of words with delimiters between them.
+//
+// An empty phrase matches only an empty string.
+// A single-word phrase is the simplest LogsQL query: `fieldName:word`
+//
+// Multi-word phrase is expressed as `fieldName:"word1 ... wordN"` in LogsQL.
+//
+// A special case `fieldName:""` matches any value without `fieldName` field.
+type phraseFilter struct {
+ fieldName string
+ phrase string
+
+ tokensOnce sync.Once
+ tokens []string
+}
+
+func (pf *phraseFilter) String() string {
+ return quoteFieldNameIfNeeded(pf.fieldName) + quoteTokenIfNeeded(pf.phrase)
+}
+
+func (pf *phraseFilter) getTokens() []string {
+ pf.tokensOnce.Do(pf.initTokens)
+ return pf.tokens
+}
+
+func (pf *phraseFilter) initTokens() {
+ pf.tokens = tokenizeStrings(nil, []string{pf.phrase})
+}
+
+func (pf *phraseFilter) updateReferencedColumnNames(m map[string]struct{}) {
+ m[pf.fieldName] = struct{}{}
+}
+
+func (pf *phraseFilter) apply(bs *blockSearch, bm *filterBitmap) {
+ fieldName := pf.fieldName
+ phrase := pf.phrase
+
+ // Verify whether pf matches const column
+ v := bs.csh.getConstColumnValue(fieldName)
+ if v != "" {
+ if !matchPhrase(v, phrase) {
+ bm.resetBits()
+ }
+ return
+ }
+
+ // Verify whether pf matches other columns
+ ch := bs.csh.getColumnHeader(fieldName)
+ if ch == nil {
+ // Fast path - there are no matching columns.
+ // It matches anything only for empty phrase.
+ if len(phrase) > 0 {
+ bm.resetBits()
+ }
+ return
+ }
+
+ tokens := pf.getTokens()
+
+ switch ch.valueType {
+ case valueTypeString:
+ matchStringByPhrase(bs, ch, bm, phrase, tokens)
+ case valueTypeDict:
+ matchValuesDictByPhrase(bs, ch, bm, phrase)
+ case valueTypeUint8:
+ matchUint8ByExactValue(bs, ch, bm, phrase, tokens)
+ case valueTypeUint16:
+ matchUint16ByExactValue(bs, ch, bm, phrase, tokens)
+ case valueTypeUint32:
+ matchUint32ByExactValue(bs, ch, bm, phrase, tokens)
+ case valueTypeUint64:
+ matchUint64ByExactValue(bs, ch, bm, phrase, tokens)
+ case valueTypeFloat64:
+ matchFloat64ByPhrase(bs, ch, bm, phrase, tokens)
+ case valueTypeIPv4:
+ matchIPv4ByPhrase(bs, ch, bm, phrase, tokens)
+ case valueTypeTimestampISO8601:
+ matchTimestampISO8601ByPhrase(bs, ch, bm, phrase, tokens)
+ default:
+ logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
+ }
+}
+
+func matchTimestampISO8601ByLenRange(bm *filterBitmap, minLen, maxLen uint64) {
+ if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) {
+ bm.resetBits()
+ return
+ }
+}
+
+func matchTimestampISO8601ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ // Fast path - all the timestamp values match an empty prefix aka `*`
+ return
+ }
+ // There is no sense in trying to parse prefix, since it may contain incomplete timestamp.
+ // We cannot compar binary representation of timestamp and need converting
+ // the timestamp to string before searching for the prefix there.
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) == 1 {
+ matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
+ return
+ }
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ // Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return matchSequence(s, phrases)
+ })
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ return
+ }
+ if prefix < "0" || prefix > "9" || !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) {
+ n, ok := tryParseTimestampISO8601(value)
+ if !ok || n < ch.minValue || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint64(bb.B, n)
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ _, ok := tryParseTimestampISO8601(phrase)
+ if ok {
+ // Fast path - the phrase contains complete timestamp, so we can use exact search
+ matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens)
+ return
+ }
+
+ // Slow path - the phrase contains incomplete timestamp. Search over string representation of the timestamp.
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toTimestampISO8601StringExt(bs, bb, v)
+ return matchPhrase(s, phrase)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) {
+ if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) {
+ bm.resetBits()
+ return
+ }
+
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := encoding.UnmarshalUint32(b)
+ return n >= minValue && n <= maxValue
+ })
+}
+
+func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ // Fast path - all the ipv4 values match an empty prefix aka `*`
+ return
+ }
+ // There is no sense in trying to parse prefix, since it may contain incomplete ip.
+ // We cannot compare binary representation of ip address and need converting
+ // the ip to string before searching for the prefix there.
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) == 1 {
+ matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
+ return
+ }
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ // Slow path - phrases contain parts of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`.
+ // We cannot compare binary represetnation of ip address and need converting
+ // the ip to string before searching for prefix there.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchSequence(s, phrases)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ return
+ }
+ if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) {
+ n, ok := tryParseIPv4(value)
+ if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint32(bb.B, n)
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ _, ok := tryParseIPv4(phrase)
+ if ok {
+ // Fast path - phrase contains the full IP address, so we can use exact matching
+ matchIPv4ByExactValue(bs, ch, bm, phrase, tokens)
+ return
+ }
+
+ // Slow path - the phrase may contain a part of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`.
+ // We cannot compare binary represetnation of ip address and need converting
+ // the ip to string before searching for prefix there.
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toIPv4StringExt(bs, bb, v)
+ return matchPhrase(s, phrase)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "+" {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if minLen > 24 || maxLen == 0 {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ if minValue > math.Float64frombits(ch.maxValue) || maxValue < math.Float64frombits(ch.minValue) {
+ bm.resetBits()
+ return
+ }
+
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := encoding.UnmarshalUint64(b)
+ f := math.Float64frombits(n)
+ return f >= minValue && f <= maxValue
+ })
+}
+
+func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ // Fast path - all the float64 values match an empty prefix aka `*`
+ return
+ }
+ // The prefix may contain a part of the floating-point number.
+ // For example, `foo:12*` must match `12`, `123.456` and `-0.123`.
+ // This means we cannot search in binary representation of floating-point numbers.
+ // Instead, we need searching for the whole prefix in string representation
+ // of floating-point numbers :(
+ _, ok := tryParseFloat64(prefix)
+ if !ok && prefix != "." && prefix != "+" && prefix != "-" && !strings.HasPrefix(prefix, "e") && !strings.HasPrefix(prefix, "E") {
+ bm.resetBits()
+ return
+ }
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ // The phrase may contain a part of the floating-point number.
+ // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
+ // This means we cannot search in binary representation of floating-point numbers.
+ // Instead, we need searching for the whole phrase in string representation
+ // of floating-point numbers :(
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchSequence(s, phrases)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if prefix == "" {
+ // An empty prefix matches all the values
+ return
+ }
+ if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) {
+ f, ok := tryParseFloat64(value)
+ if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) {
+ bm.resetBits()
+ return
+ }
+ n := math.Float64bits(f)
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint64(bb.B, n)
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ // The phrase may contain a part of the floating-point number.
+ // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
+ // This means we cannot search in binary representation of floating-point numbers.
+ // Instead, we need searching for the whole phrase in string representation
+ // of floating-point numbers :(
+ _, ok := tryParseFloat64(phrase)
+ if !ok && phrase != "." && phrase != "+" && phrase != "-" {
+ bm.resetBits()
+ return
+ }
+ if n := strings.IndexByte(phrase, '.'); n > 0 && n < len(phrase)-1 {
+ // Fast path - the phrase contains the exact floating-point number, so we can use exact search
+ matchFloat64ByExactValue(bs, ch, bm, phrase, tokens)
+ return
+ }
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toFloat64StringExt(bs, bb, v)
+ return matchPhrase(s, phrase)
+ })
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchIPv4Range(v, minValue, maxValue) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchStringRange(v, minValue, maxValue) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchLenRange(v, minLen, maxLen) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchRange(v, minValue, maxValue) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if re.MatchString(v) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefixLowercase string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchAnyCasePrefix(v, prefixLowercase) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phraseLowercase string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchAnyCasePhrase(v, phraseLowercase) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchPrefix(v, prefix) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases []string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchSequence(v, phrases) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchExactPrefix(v, prefix) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if v == value {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, values map[string]struct{}) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if _, ok := values[v]; ok {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string) {
+ bb := bbPool.Get()
+ for i, v := range ch.valuesDict.values {
+ if matchPhrase(v, phrase) {
+ bb.B = append(bb.B, byte(i))
+ }
+ }
+ matchEncodedValuesDict(bs, ch, bm, bb.B)
+ bbPool.Put(bb)
+}
+
+func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *filterBitmap, encodedValues []byte) {
+ if len(encodedValues) == 0 {
+ // Fast path - the phrase is missing in the valuesDict
+ bm.resetBits()
+ return
+ }
+ // Slow path - iterate over values
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 1 {
+ logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
+ }
+ n := bytes.IndexByte(encodedValues, v[0])
+ return n >= 0
+ })
+}
+
+func matchStringByIPv4Range(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchIPv4Range(v, minValue, maxValue)
+ })
+}
+
+func matchStringByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchStringRange(v, minValue, maxValue)
+ })
+}
+
+func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchLenRange(v, minLen, maxLen)
+ })
+}
+
+func matchStringByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchRange(v, minValue, maxValue)
+ })
+}
+
+func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return re.MatchString(v)
+ })
+}
+
+func matchStringByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefixLowercase string) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchAnyCasePrefix(v, prefixLowercase)
+ })
+}
+
+func matchStringByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phraseLowercase string) {
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchAnyCasePhrase(v, phraseLowercase)
+ })
+}
+
+func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchPrefix(v, prefix)
+ })
+}
+
+func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases []string, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchSequence(v, phrases)
+ })
+}
+
+func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchExactPrefix(v, prefix)
+ })
+}
+
+func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return v == value
+ })
+}
+
+func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return matchPhrase(v, phrase)
+ })
+}
+
+func matchUint8ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint8String(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint16String(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint32String(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) {
+ if minValue > "9" || maxValue < "0" {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint64String(bs, bb, v)
+ return matchStringRange(s, minValue, maxValue)
+ })
+ bbPool.Put(bb)
+}
+
+func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
+ bb := bbPool.Get()
+ defer bbPool.Put(bb)
+
+ bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10)
+ s := bytesutil.ToUnsafeString(bb.B)
+ if maxLen < uint64(len(s)) {
+ return false
+ }
+ bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10)
+ s = bytesutil.ToUnsafeString(bb.B)
+ return minLen <= uint64(len(s))
+}
+
+func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if !matchMinMaxValueLen(ch, minLen, maxLen) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint8String(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if !matchMinMaxValueLen(ch, minLen, maxLen) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint16String(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if !matchMinMaxValueLen(ch, minLen, maxLen) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint32String(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) {
+ if !matchMinMaxValueLen(ch, minLen, maxLen) {
+ bm.resetBits()
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint64String(bs, bb, v)
+ return matchLenRange(s, minLen, maxLen)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
+ if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 1 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
+ }
+ n := uint64(v[0])
+ return n >= minValueUint && n <= maxValueUint
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
+ if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 2 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint16(b))
+ return n >= minValueUint && n <= maxValueUint
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
+ if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint32(b))
+ return n >= minValueUint && n <= maxValueUint
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) {
+ minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
+ if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := encoding.UnmarshalUint64(b)
+ return n >= minValueUint && n <= maxValueUint
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint8String(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint16String(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint32String(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) {
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint64String(bs, bb, v)
+ return re.MatchString(s)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint8ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ if prefix == "" {
+ // Fast path - all the uint8 values match an empty prefix aka `*`
+ return
+ }
+ // The prefix may contain a part of the number.
+ // For example, `foo:12*` must match `12` and `123`.
+ // This means we cannot search in binary representation of numbers.
+ // Instead, we need searching for the whole prefix in string representation of numbers :(
+ n, ok := tryParseUint64(prefix)
+ if !ok || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ // There is no need in matching against bloom filters, since tokens is empty.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint8String(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ if prefix == "" {
+ // Fast path - all the uint16 values match an empty prefix aka `*`
+ return
+ }
+ // The prefix may contain a part of the number.
+ // For example, `foo:12*` must match `12` and `123`.
+ // This means we cannot search in binary representation of numbers.
+ // Instead, we need searching for the whole prefix in string representation of numbers :(
+ n, ok := tryParseUint64(prefix)
+ if !ok || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ // There is no need in matching against bloom filters, since tokens is empty.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint16String(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ if prefix == "" {
+ // Fast path - all the uint32 values match an empty prefix aka `*`
+ return
+ }
+ // The prefix may contain a part of the number.
+ // For example, `foo:12*` must match `12` and `123`.
+ // This means we cannot search in binary representation of numbers.
+ // Instead, we need searching for the whole prefix in string representation of numbers :(
+ n, ok := tryParseUint64(prefix)
+ if !ok || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ // There is no need in matching against bloom filters, since tokens is empty.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint32String(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) {
+ if prefix == "" {
+ // Fast path - all the uint64 values match an empty prefix aka `*`
+ return
+ }
+ // The prefix may contain a part of the number.
+ // For example, `foo:12*` must match `12` and `123`.
+ // This means we cannot search in binary representation of numbers.
+ // Instead, we need searching for the whole prefix in string representation of numbers :(
+ n, ok := tryParseUint64(prefix)
+ if !ok || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ // There is no need in matching against bloom filters, since tokens is empty.
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint64String(bs, bb, v)
+ return matchPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) > 1 {
+ bm.resetBits()
+ return
+ }
+ matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
+}
+
+func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) > 1 {
+ bm.resetBits()
+ return
+ }
+ matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
+}
+
+func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) > 1 {
+ bm.resetBits()
+ return
+ }
+ matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
+}
+
+func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) {
+ if len(phrases) > 1 {
+ bm.resetBits()
+ return
+ }
+ matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens)
+}
+
+func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint8String(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint16String(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint32String(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) {
+ if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
+ return
+ }
+
+ bb := bbPool.Get()
+ visitValues(bs, ch, bm, func(v string) bool {
+ s := toUint64String(bs, bb, v)
+ return matchExactPrefix(s, prefix)
+ })
+ bbPool.Put(bb)
+}
+
+func matchMinMaxExactPrefix(ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) bool {
+ if prefix == "" {
+ // An empty prefix matches all the values
+ return false
+ }
+ if len(tokens) > 0 {
+ // Non-empty tokens means that the prefix contains at least two tokens.
+ // Multiple tokens cannot match any uint value.
+ bm.resetBits()
+ return false
+ }
+ n, ok := tryParseUint64(prefix)
+ if !ok || n > ch.maxValue {
+ bm.resetBits()
+ return false
+ }
+ return true
+}
+
+func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ n, ok := tryParseUint64(phrase)
+ if !ok || n < ch.minValue || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = append(bb.B, byte(n))
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ n, ok := tryParseUint64(phrase)
+ if !ok || n < ch.minValue || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint16(bb.B, uint16(n))
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ n, ok := tryParseUint64(phrase)
+ if !ok || n < ch.minValue || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint32(bb.B, uint32(n))
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) {
+ n, ok := tryParseUint64(phrase)
+ if !ok || n < ch.minValue || n > ch.maxValue {
+ bm.resetBits()
+ return
+ }
+ bb := bbPool.Get()
+ bb.B = encoding.MarshalUint64(bb.B, n)
+ matchBinaryValue(bs, ch, bm, bb.B, tokens)
+ bbPool.Put(bb)
+}
+
+func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, binValue []byte, tokens []string) {
+ if !matchBloomFilterAllTokens(bs, ch, tokens) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ return v == string(binValue)
+ })
+}
+
+func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, values map[string]struct{}, tokenSets [][]string) {
+ if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) {
+ bm.resetBits()
+ return
+ }
+ visitValues(bs, ch, bm, func(v string) bool {
+ _, ok := values[v]
+ return ok
+ })
+}
+
+func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets [][]string) bool {
+ if len(tokenSets) == 0 {
+ return false
+ }
+ if len(tokenSets) > maxTokenSetsToInit || uint64(len(tokenSets)) > 10*bs.bsw.bh.rowsCount {
+ // It is faster to match every row in the block against all the values
+ // instead of using bloom filter for too big number of tokenSets.
+ return true
+ }
+ bf := bs.getBloomFilterForColumn(ch)
+ for _, tokens := range tokenSets {
+ if bf.containsAll(tokens) {
+ return true
+ }
+ }
+ return false
+}
+
+func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool {
+ if len(tokens) == 0 {
+ return true
+ }
+ bf := bs.getBloomFilterForColumn(ch)
+ return bf.containsAll(tokens)
+}
+
+func visitValues(bs *blockSearch, ch *columnHeader, bm *filterBitmap, f func(value string) bool) {
+ if bm.isZero() {
+ // Fast path - nothing to visit
+ return
+ }
+ values := bs.getValuesForColumn(ch)
+ bm.forEachSetBit(func(idx int) bool {
+ return f(values[idx])
+ })
+}
+
+func matchAnyCasePrefix(s, prefixLowercase string) bool {
+ sLowercase := strings.ToLower(s)
+ return matchPrefix(sLowercase, prefixLowercase)
+}
+
+func matchPrefix(s, prefix string) bool {
+ if len(prefix) == 0 {
+ return len(s) > 0
+ }
+ r, _ := utf8.DecodeRuneInString(prefix)
+ startsWithToken := isTokenRune(r)
+ offset := 0
+ for {
+ n := strings.Index(s[offset:], prefix)
+ if n < 0 {
+ return false
+ }
+ offset += n
+ // Make sure that the found phrase contains non-token chars at the beginning
+ if startsWithToken && offset > 0 {
+ r, _ := utf8.DecodeLastRuneInString(s[:offset])
+ if r == utf8.RuneError || isTokenRune(r) {
+ offset++
+ continue
+ }
+ }
+ return true
+ }
+}
+
+func matchIPv4Range(s string, minValue, maxValue uint32) bool {
+ n, ok := tryParseIPv4(s)
+ if !ok {
+ return false
+ }
+ return n >= minValue && n <= maxValue
+}
+
+func matchStringRange(s, minValue, maxValue string) bool {
+ return s >= minValue && s < maxValue
+}
+
+func matchLenRange(s string, minLen, maxLen uint64) bool {
+ sLen := uint64(utf8.RuneCountInString(s))
+ return sLen >= minLen && sLen <= maxLen
+}
+
+func matchRange(s string, minValue, maxValue float64) bool {
+ f, ok := tryParseFloat64(s)
+ if !ok {
+ return false
+ }
+ return f >= minValue && f <= maxValue
+}
+
+func matchSequence(s string, phrases []string) bool {
+ for _, phrase := range phrases {
+ n := getPhrasePos(s, phrase)
+ if n < 0 {
+ return false
+ }
+ s = s[n+len(phrase):]
+ }
+ return true
+}
+
+func matchAnyCasePhrase(s, phraseLowercase string) bool {
+ sLowercase := strings.ToLower(s)
+ return matchPhrase(sLowercase, phraseLowercase)
+}
+
+func matchExactPrefix(s, prefix string) bool {
+ return strings.HasPrefix(s, prefix)
+}
+
+func matchPhrase(s, phrase string) bool {
+ if len(phrase) == 0 {
+ return len(s) == 0
+ }
+ n := getPhrasePos(s, phrase)
+ return n >= 0
+}
+
+func getPhrasePos(s, phrase string) int {
+ r, _ := utf8.DecodeRuneInString(phrase)
+ startsWithToken := isTokenRune(r)
+ r, _ = utf8.DecodeLastRuneInString(phrase)
+ endsWithToken := isTokenRune(r)
+ pos := 0
+ for {
+ n := strings.Index(s[pos:], phrase)
+ if n < 0 {
+ return -1
+ }
+ pos += n
+ // Make sure that the found phrase contains non-token chars at the beginning and at the end
+ if startsWithToken && pos > 0 {
+ r, _ := utf8.DecodeLastRuneInString(s[:pos])
+ if r == utf8.RuneError || isTokenRune(r) {
+ pos++
+ continue
+ }
+ }
+ if endsWithToken && pos+len(phrase) < len(s) {
+ r, _ := utf8.DecodeRuneInString(s[pos+len(phrase):])
+ if r == utf8.RuneError || isTokenRune(r) {
+ pos++
+ continue
+ }
+ }
+ return pos
+ }
+}
+
+type stringBucket struct {
+ a []string
+}
+
+func (sb *stringBucket) reset() {
+ a := sb.a
+ for i := range a {
+ a[i] = ""
+ }
+ sb.a = a[:0]
+}
+
+func getStringBucket() *stringBucket {
+ v := stringBucketPool.Get()
+ if v == nil {
+ return &stringBucket{}
+ }
+ return v.(*stringBucket)
+}
+
+func putStringBucket(sb *stringBucket) {
+ sb.reset()
+ stringBucketPool.Put(sb)
+}
+
+var stringBucketPool sync.Pool
+
+func getTokensSkipLast(s string) []string {
+ for {
+ r, runeSize := utf8.DecodeLastRuneInString(s)
+ if !isTokenRune(r) {
+ break
+ }
+ s = s[:len(s)-runeSize]
+ }
+ return tokenizeStrings(nil, []string{s})
+}
+
+func toUint64Range(minValue, maxValue float64) (uint64, uint64) {
+ minValue = math.Ceil(minValue)
+ maxValue = math.Floor(maxValue)
+ return toUint64Clamp(minValue), toUint64Clamp(maxValue)
+}
+
+func toUint64Clamp(f float64) uint64 {
+ if f < 0 {
+ return 0
+ }
+ if f > (1<<64)-1 {
+ return (1 << 64) - 1
+ }
+ return uint64(f)
+}
+
+func quoteFieldNameIfNeeded(s string) string {
+ if s == "_msg" || s == "" {
+ return ""
+ }
+ return quoteTokenIfNeeded(s) + ":"
+}
+
+func toUint8String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 1 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
+ }
+ n := uint64(v[0])
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toUint16String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 2 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint16(b))
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toUint32String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := uint64(encoding.UnmarshalUint32(b))
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v))
+ }
+ b := bytesutil.ToUnsafeBytes(v)
+ n := encoding.UnmarshalUint64(b)
+ bb.B = strconv.AppendUint(bb.B[:0], n, 10)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toFloat64StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
+ }
+ bb.B = toFloat64String(bb.B[:0], v)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toIPv4StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 4 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
+ }
+ bb.B = toIPv4String(bb.B[:0], v)
+ return bytesutil.ToUnsafeString(bb.B)
+}
+
+func toTimestampISO8601StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
+ if len(v) != 8 {
+ logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v))
+ }
+ bb.B = toTimestampISO8601String(bb.B[:0], v)
+ return bytesutil.ToUnsafeString(bb.B)
+}
diff --git a/lib/logstorage/filters_test.go b/lib/logstorage/filters_test.go
new file mode 100644
index 000000000..cf7d6e782
--- /dev/null
+++ b/lib/logstorage/filters_test.go
@@ -0,0 +1,9296 @@
+package logstorage
+
+import (
+ "fmt"
+ "math"
+ "reflect"
+ "regexp"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+)
+
+func TestMatchLenRange(t *testing.T) {
+ f := func(s string, minLen, maxLen uint64, resultExpected bool) {
+ t.Helper()
+ result := matchLenRange(s, minLen, maxLen)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ f("", 0, 0, true)
+ f("", 0, 1, true)
+ f("", 1, 1, false)
+
+ f("abc", 0, 2, false)
+ f("abc", 0, 3, true)
+ f("abc", 0, 4, true)
+ f("abc", 3, 4, true)
+ f("abc", 4, 4, false)
+ f("abc", 4, 2, false)
+
+ f("ФЫВА", 3, 3, false)
+ f("ФЫВА", 4, 4, true)
+ f("ФЫВА", 5, 5, false)
+ f("ФЫВА", 0, 10, true)
+}
+
+func TestMatchPhrase(t *testing.T) {
+ f := func(s, phrase string, resultExpected bool) {
+ t.Helper()
+ result := matchPhrase(s, phrase)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ f("", "", true)
+ f("foo", "", false)
+ f("", "foo", false)
+ f("foo", "foo", true)
+ f("foo bar", "foo", true)
+ f("foo bar", "bar", true)
+ f("a foo bar", "foo", true)
+ f("a foo bar", "fo", false)
+ f("a foo bar", "oo", false)
+ f("foobar", "foo", false)
+ f("foobar", "bar", false)
+ f("foobar", "oob", false)
+ f("afoobar foo", "foo", true)
+ f("раз два (три!)", "три", true)
+ f("", "foo bar", false)
+ f("foo bar", "foo bar", true)
+ f("(foo bar)", "foo bar", true)
+ f("afoo bar", "foo bar", false)
+ f("afoo bar", "afoo ba", false)
+ f("foo bar! baz", "foo bar!", true)
+ f("a.foo bar! baz", ".foo bar! ", true)
+ f("foo bar! baz", "foo bar! b", false)
+ f("255.255.255.255", "5", false)
+ f("255.255.255.255", "55", false)
+ f("255.255.255.255", "255", true)
+ f("255.255.255.255", "5.255", false)
+ f("255.255.255.255", "255.25", false)
+ f("255.255.255.255", "255.255", true)
+}
+
+func TestMatchPrefix(t *testing.T) {
+ f := func(s, prefix string, resultExpected bool) {
+ t.Helper()
+ result := matchPrefix(s, prefix)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ f("", "", false)
+ f("foo", "", true)
+ f("", "foo", false)
+ f("foo", "foo", true)
+ f("foo bar", "foo", true)
+ f("foo bar", "bar", true)
+ f("a foo bar", "foo", true)
+ f("a foo bar", "fo", true)
+ f("a foo bar", "oo", false)
+ f("foobar", "foo", true)
+ f("foobar", "bar", false)
+ f("foobar", "oob", false)
+ f("afoobar foo", "foo", true)
+ f("раз два (три!)", "три", true)
+ f("", "foo bar", false)
+ f("foo bar", "foo bar", true)
+ f("(foo bar)", "foo bar", true)
+ f("afoo bar", "foo bar", false)
+ f("afoo bar", "afoo ba", true)
+ f("foo bar! baz", "foo bar!", true)
+ f("a.foo bar! baz", ".foo bar! ", true)
+ f("foo bar! baz", "foo bar! b", true)
+ f("255.255.255.255", "5", false)
+ f("255.255.255.255", "55", false)
+ f("255.255.255.255", "255", true)
+ f("255.255.255.255", "5.255", false)
+ f("255.255.255.255", "255.25", true)
+ f("255.255.255.255", "255.255", true)
+}
+
+func TestMatchSequence(t *testing.T) {
+ f := func(s string, phrases []string, resultExpected bool) {
+ t.Helper()
+ result := matchSequence(s, phrases)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ f("", []string{""}, true)
+ f("foo", []string{""}, true)
+ f("", []string{"foo"}, false)
+ f("foo", []string{"foo"}, true)
+ f("foo bar", []string{"foo"}, true)
+ f("foo bar", []string{"bar"}, true)
+ f("foo bar", []string{"foo bar"}, true)
+ f("foo bar", []string{"foo", "bar"}, true)
+ f("foo bar", []string{"foo", " bar"}, true)
+ f("foo bar", []string{"foo ", "bar"}, true)
+ f("foo bar", []string{"foo ", " bar"}, false)
+ f("foo bar", []string{"bar", "foo"}, false)
+}
+
+func TestMatchStringRange(t *testing.T) {
+ f := func(s, minValue, maxValue string, resultExpected bool) {
+ t.Helper()
+ result := matchStringRange(s, minValue, maxValue)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ f("foo", "a", "b", false)
+ f("foo", "a", "foa", false)
+ f("foo", "a", "foz", true)
+ f("foo", "foo", "foo", false)
+ f("foo", "foo", "fooa", true)
+ f("foo", "fooa", "foo", false)
+}
+
+func TestMatchIPv4Range(t *testing.T) {
+ f := func(s string, minValue, maxValue uint32, resultExpected bool) {
+ t.Helper()
+ result := matchIPv4Range(s, minValue, maxValue)
+ if result != resultExpected {
+ t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
+ }
+ }
+
+ // Invalid IP
+ f("", 0, 1000, false)
+ f("123", 0, 1000, false)
+
+ // range mismatch
+ f("0.0.0.1", 2, 100, false)
+ f("127.0.0.1", 0x6f000000, 0x7f000000, false)
+
+ // range match
+ f("0.0.0.1", 1, 1, true)
+ f("0.0.0.1", 0, 100, true)
+ f("127.0.0.1", 0x7f000000, 0x7f000001, true)
+}
+
+func TestFilterBitmap(t *testing.T) {
+ for i := 0; i < 100; i++ {
+ bm := getFilterBitmap(i)
+ if bm.bitsLen != i {
+ t.Fatalf("unexpected bits length: %d; want %d", bm.bitsLen, i)
+ }
+
+ // Make sure that all the bits are set.
+ nextIdx := 0
+ bm.forEachSetBit(func(idx int) bool {
+ if idx >= i {
+ t.Fatalf("index must be smaller than %d", i)
+ }
+ if idx != nextIdx {
+ t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
+ }
+ nextIdx++
+ return true
+ })
+
+ // Clear a part of bits
+ bm.forEachSetBit(func(idx int) bool {
+ return idx%2 != 0
+ })
+ nextIdx = 1
+ bm.forEachSetBit(func(idx int) bool {
+ if idx != nextIdx {
+ t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
+ }
+ nextIdx += 2
+ return true
+ })
+
+ // Clear all the bits
+ bm.forEachSetBit(func(idx int) bool {
+ return false
+ })
+ bitsCount := 0
+ bm.forEachSetBit(func(idx int) bool {
+ bitsCount++
+ return true
+ })
+ if bitsCount != 0 {
+ t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount)
+ }
+
+ putFilterBitmap(bm)
+ }
+}
+
+func TestComplexFilters(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a",
+ "a foobar abcdef",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // (foobar AND NOT baz AND (abcdef OR xyz))
+ f := &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foobar",
+ },
+ ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "baz",
+ },
+ },
+ &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "abcdef",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "xyz",
+ },
+ },
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", []int{6})
+
+ // (foobaz AND NOT baz AND (abcdef OR xyz))
+ f = &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foobaz",
+ },
+ ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "baz",
+ },
+ },
+ &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "abcdef",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "xyz",
+ },
+ },
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", nil)
+
+ // (foobaz AND NOT baz AND (abcdef OR xyz OR a))
+ f = &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foobar",
+ },
+ ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "baz",
+ },
+ },
+ &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "abcdef",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "xyz",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ },
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", []int{1, 6})
+
+ // (foobaz AND NOT qwert AND (abcdef OR xyz OR a))
+ f = &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foobar",
+ },
+ ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "qwert",
+ },
+ },
+ &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "abcdef",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "xyz",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ },
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6})
+}
+
+func TestOrFilter(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a",
+ "a foobar abcdef",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // non-empty union
+ of := &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "23",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{2, 6, 9})
+
+ // reverse non-empty union
+ of = &orFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "23",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{2, 6, 9})
+
+ // first empty result, second non-empty result
+ of = &orFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "xabc",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "23",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{9})
+
+ // first non-empty result, second empty result
+ of = &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "23",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "xabc",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{9})
+
+ // first match all
+ of = &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "23",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // second match all
+ of = &orFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "23",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // both empty results
+ of = &orFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "x23",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "xabc",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, of, "foo", nil)
+}
+
+func TestAndFilter(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "",
+ "a foobar abcdef",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // non-empty intersection
+ af := &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{2, 6})
+
+ // reverse non-empty intersection
+ af = &andFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{2, 6})
+
+ // the first filter mismatch
+ af = &andFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "bc",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ // the last filter mismatch
+ af = &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "foo",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ // empty intersection
+ af = &andFilter{
+ filters: []filter{
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foo",
+ },
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ // reverse empty intersection
+ af = &andFilter{
+ filters: []filter{
+ &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ },
+ &phraseFilter{
+ fieldName: "foo",
+ phrase: "foo",
+ },
+ },
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+}
+
+func TestNotFilter(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ nf := ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 6, 7, 8, 9})
+
+ nf = ¬Filter{
+ f: &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", []int{5})
+
+ nf = ¬Filter{
+ f: &phraseFilter{
+ fieldName: "non-existing-field",
+ phrase: "foobar",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ nf = ¬Filter{
+ f: &prefixFilter{
+ fieldName: "non-existing-field",
+ prefix: "",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ nf = ¬Filter{
+ f: &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", []int{5})
+
+ // mismatch
+ nf = ¬Filter{
+ f: &phraseFilter{
+ fieldName: "non-existing-field",
+ phrase: "",
+ },
+ }
+ testFilterMatchForColumns(t, columns, nf, "foo", nil)
+}
+
+func TestTimeFilter(t *testing.T) {
+ timestamps := []int64{
+ 1,
+ 9,
+ 123,
+ 456,
+ 789,
+ }
+
+ // match
+ tf := &timeFilter{
+ minTimestamp: -10,
+ maxTimestamp: 1,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{0})
+
+ tf = &timeFilter{
+ minTimestamp: -10,
+ maxTimestamp: 10,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{0, 1})
+
+ tf = &timeFilter{
+ minTimestamp: 1,
+ maxTimestamp: 1,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{0})
+
+ tf = &timeFilter{
+ minTimestamp: 2,
+ maxTimestamp: 456,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{1, 2, 3})
+
+ tf = &timeFilter{
+ minTimestamp: 2,
+ maxTimestamp: 457,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{1, 2, 3})
+
+ tf = &timeFilter{
+ minTimestamp: 120,
+ maxTimestamp: 788,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3})
+
+ tf = &timeFilter{
+ minTimestamp: 120,
+ maxTimestamp: 789,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3, 4})
+
+ tf = &timeFilter{
+ minTimestamp: 120,
+ maxTimestamp: 10000,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3, 4})
+
+ tf = &timeFilter{
+ minTimestamp: 789,
+ maxTimestamp: 1000,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, []int{4})
+
+ // mismatch
+ tf = &timeFilter{
+ minTimestamp: -1000,
+ maxTimestamp: 0,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, nil)
+
+ tf = &timeFilter{
+ minTimestamp: 790,
+ maxTimestamp: 1000,
+ }
+ testFilterMatchForTimestamps(t, timestamps, tf, nil)
+}
+
+func TestStreamFilter(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // Match
+ f := &exactFilter{
+ fieldName: "job",
+ value: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // Mismatch
+ f = &exactFilter{
+ fieldName: "job",
+ value: "abc",
+ }
+ testFilterMatchForColumns(t, columns, f, "foo", nil)
+}
+
+func TestSequenceFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"def"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc def"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc ", "", "def", ""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"ab"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc", "abc"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc", "def", "foo"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc", " def"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc ", ""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{"", ""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"abc def ", "foobar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing column",
+ phrases: []string{"x", "yz"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "baz foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "afoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"foobar", "baz"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{3})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"baz", "aaaa"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing column",
+ phrases: []string{"foobar", "aaaa"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a bb foo",
+ "bb a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar bb",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"a", "bb"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 6})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"НГКШ", " "},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{8})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"!,", "(!1)"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{9})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"aa a", "bcdasqq"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"@", "!!!!"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"12"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", "bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234", "567"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "256",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"12"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", "bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234", "567"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "65536",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"12"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", "bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234", "567"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "12345678901",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"12"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", "bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234", "567"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"-", "65536"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{3})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"1234.", "5678901"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{4})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", "5678901"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{4})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"", ""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"65536", "-"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"5678901", "1234"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"12345678901234567890"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "1.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"127.0.0.1"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"127", "1"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"127.0.0"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"2.3", ".4"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"5"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"127.", "1", "1", "345"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"27.0"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "foo",
+ phrases: []string{"255.255.255.255"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ sf := &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"2006-01-02T15:04:05.005Z"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{4})
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"2006-01", "04:05."},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"2006", "002Z"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{1})
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ sf = &sequenceFilter{
+ fieldName: "non-existing-column",
+ phrases: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"002Z", "2006"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"2006-04-02T15:04:05.005Z", "2023"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", nil)
+
+ sf = &sequenceFilter{
+ fieldName: "_msg",
+ phrases: []string{"06"},
+ }
+ testFilterMatchForColumns(t, columns, sf, "_msg", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "foobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5, 6})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "baz",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "aa fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar baz",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "aa ",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4})
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "aa b",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "fobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "aa",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "999",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "7",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "467",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "999",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "7",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "65536",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "99999",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "7",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "123456789012",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "1234567890123",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "7",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 4})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "1234.567",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{4})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "-65536",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{3})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "6511",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.2",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "127.0.",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4, 5, 7})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "255",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:06.004Z",
+ "2006-01-02T15:04:06.005Z",
+ "2006-01-02T15:04:07.006Z",
+ "2006-01-02T15:04:10.007Z",
+ "2006-01-02T15:04:12.008Z",
+ "2006-01-02T15:04:15.009Z",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T15:04:05",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2})
+
+ ef = &exactPrefixFilter{
+ fieldName: "_msg",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ ef = &exactPrefixFilter{
+ fieldName: "_msg",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "_msg",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", nil)
+ })
+}
+
+func TestExactPrefixFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "abc d",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "xabc",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ ef := &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "ab",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ // mismatch
+ ef = &exactPrefixFilter{
+ fieldName: "foo",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactPrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+}
+
+func TestExactFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "abc",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "non-existing column",
+ value: "abc",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "non-existing column",
+ value: "x",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "afoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 6})
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "baz",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "non-existing column",
+ value: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar baz",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "aa abc a",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{2})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "aa a",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "fooaaazz a",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "33",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "256",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "33",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "65536",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "33",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "12345678901",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "12",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "33",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "1234",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0})
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "1234.5678901",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{4})
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "-65536",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{3})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "65536",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "123",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "foo",
+ value: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4, 5, 7})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "127.0",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+
+ ef = &exactFilter{
+ fieldName: "foo",
+ value: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, ef, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ ef := &exactFilter{
+ fieldName: "_msg",
+ value: "2006-01-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", []int{4})
+
+ ef = &exactFilter{
+ fieldName: "non-existing-column",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ ef = &exactFilter{
+ fieldName: "_msg",
+ value: "bar",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", nil)
+
+ ef = &exactFilter{
+ fieldName: "_msg",
+ value: "",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", nil)
+
+ ef = &exactFilter{
+ fieldName: "_msg",
+ value: "2006-03-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, ef, "_msg", nil)
+ })
+}
+
+func TestInFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ {
+ name: "other column",
+ values: []string{
+ "asdfdsf",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"abc def", "abc", "foobar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0})
+
+ af = &inFilter{
+ fieldName: "other column",
+ values: []string{"asdfdsf", ""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{"", "foo"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"abc", "def"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"", "abc"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "other column",
+ values: []string{"sd"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "non-existing column",
+ values: []string{"abc", "def"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"aaaa", "abc def", "foobar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{"", "abc"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"abc def ", "foobar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "non-existing column",
+ values: []string{"x"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "afoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"foobar", "aaaa", "abc", "baz"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 6})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bbbb", "", "aaaa"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar", "aaaa"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "non-existing column",
+ values: []string{"foobar", "aaaa"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"a foobar", "aa abc a"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 6})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"aa a"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"12", "32"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"0"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"33"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"1234"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "256",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"12", "32"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"0"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"33"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"123456"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "65536",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"12", "32"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"0"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"33"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"12345678901"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "12345678901",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"12", "32"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"0"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"33"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"1234", "1", "foobar", "123211"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 5})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"1234.5678901"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{4})
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"-65536"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{3})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"65536"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"123"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"12345678901234567890"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "foo",
+ values: []string{"127.0.0.1", "24.54.1.2", "127.0.4.2"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{2, 4, 5, 6, 7})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"5"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+
+ af = &inFilter{
+ fieldName: "foo",
+ values: []string{"255.255.255.255"},
+ }
+ testFilterMatchForColumns(t, columns, af, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ af := &inFilter{
+ fieldName: "_msg",
+ values: []string{"2006-01-02T15:04:05.005Z", "foobar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", []int{4})
+
+ af = &inFilter{
+ fieldName: "non-existing-column",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ af = &inFilter{
+ fieldName: "_msg",
+ values: []string{"bar"},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", nil)
+
+ af = &inFilter{
+ fieldName: "_msg",
+ values: []string{},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", nil)
+
+ af = &inFilter{
+ fieldName: "_msg",
+ values: []string{""},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", nil)
+
+ af = &inFilter{
+ fieldName: "_msg",
+ values: []string{"2006-04-02T15:04:05.005Z"},
+ }
+ testFilterMatchForColumns(t, columns, af, "_msg", nil)
+ })
+}
+
+func TestRegexpFilter(t *testing.T) {
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("0.0"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile(`^127\.0\.0\.1$`),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = ®expFilter{
+ fieldName: "non-existing-column",
+ re: regexp.MustCompile("foo.+bar|"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo.+bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = ®expFilter{
+ fieldName: "non-existing-column",
+ re: regexp.MustCompile("foo.+bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "127.0.0.1",
+ "Abc",
+ "127.255.255.255",
+ "10.4",
+ "foo 127.0.0.1",
+ "127.0.0.1 bar",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar|^$"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 5, 6})
+
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("27.0"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 5, 6, 7})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("bar.+foo"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a 10",
+ "127.0.0.1",
+ "20",
+ "15.5",
+ "-5",
+ "a fooBaR",
+ "a 127.0.0.1 dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("(?i)foo|йцу"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 6, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("qwe.+rty|^$"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("[32][23]?"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65535",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("[32][23]?"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65536",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("[32][23]?"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("[32][23]?"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "123456.78901",
+ "-0.2",
+ "2",
+ "-334",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("[32][23]?"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 6, 7, 8})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("127.0.[40].(1|2)"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 4, 5, 6, 7})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "foo",
+ re: regexp.MustCompile("foo|bar|834"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ rf := ®expFilter{
+ fieldName: "_msg",
+ re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", []int{1, 4})
+
+ // mismatch
+ rf = ®expFilter{
+ fieldName: "_msg",
+ re: regexp.MustCompile("^01|04$"),
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+ })
+}
+
+func TestStringRangeFilter(t *testing.T) {
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.1",
+ maxValue: "255.",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.1",
+ maxValue: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "",
+ maxValue: "127.0.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "non-existing-column",
+ minValue: "1",
+ maxValue: "2",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.2",
+ maxValue: "",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "127.0.0.1",
+ "Abc",
+ "127.255.255.255",
+ "10.4",
+ "foo 127.0.0.1",
+ "127.0.0.1 bar",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.0",
+ maxValue: "128.0.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 6, 7})
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127",
+ maxValue: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 7})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "0",
+ maxValue: "10",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.2",
+ maxValue: "127.127.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "128.0.0.0",
+ maxValue: "127.0.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a 10",
+ "127.0.0.1",
+ "20",
+ "15.5",
+ "-5",
+ "a fooBaR",
+ "a 127.0.0.1 dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0.1",
+ maxValue: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "0",
+ maxValue: "10",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "255.255.255.255",
+ maxValue: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "33",
+ maxValue: "5",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "100",
+ maxValue: "101",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "5",
+ maxValue: "33",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65535",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "33",
+ maxValue: "5",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "100",
+ maxValue: "101",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "5",
+ maxValue: "33",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65536",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "33",
+ maxValue: "5",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "100",
+ maxValue: "101",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "5",
+ maxValue: "33",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "33",
+ maxValue: "5",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "100",
+ maxValue: "101",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "5",
+ maxValue: "33",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "123456.78901",
+ "-0.2",
+ "2",
+ "-334",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "33",
+ maxValue: "5",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10})
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "-0",
+ maxValue: "-1",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{6})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "100",
+ maxValue: "101",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "5",
+ maxValue: "33",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "127.0.0",
+ maxValue: "128.0.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 4, 5, 6, 7})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "128.0.0.0",
+ maxValue: "129.0.0.0",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "255.0.0.0",
+ maxValue: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "foo",
+ minValue: "128.0.0.0",
+ maxValue: "",
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2005-01-02T15:04:05.001Z",
+ "2006-02-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2026-01-02T15:04:05.005Z",
+ "2026-01-02T15:04:05.006Z",
+ "2026-01-02T15:04:05.007Z",
+ "2026-01-02T15:04:05.008Z",
+ "2026-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ rf := &stringRangeFilter{
+ fieldName: "_msg",
+ minValue: "2006-01-02",
+ maxValue: "2006-01-03",
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", []int{2, 3})
+
+ rf = &stringRangeFilter{
+ fieldName: "_msg",
+ minValue: "",
+ maxValue: "2006",
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", []int{0})
+
+ // mismatch
+ rf = &stringRangeFilter{
+ fieldName: "_msg",
+ minValue: "3",
+ maxValue: "4",
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "_msg",
+ minValue: "a",
+ maxValue: "b",
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+
+ rf = &stringRangeFilter{
+ fieldName: "_msg",
+ minValue: "2006-01-03",
+ maxValue: "2006-01-02",
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+ })
+}
+
+func TestIPv4RangeFilter(t *testing.T) {
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0x80000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x7f000001,
+ maxValue: 0x7f000001,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0x7f000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "non-existing-column",
+ minValue: 0,
+ maxValue: 20000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x80000000,
+ maxValue: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "127.0.0.1",
+ "Abc",
+ "127.255.255.255",
+ "10.4",
+ "foo 127.0.0.1",
+ "127.0.0.1 bar",
+ "127.0.0.1",
+ },
+ },
+ }
+
+ // match
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x7f000000,
+ maxValue: 0x80000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 7})
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0x7f000001,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 7})
+
+ // mismatch
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 1000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x7f000002,
+ maxValue: 0x7f7f0000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x80000000,
+ maxValue: 0x7f000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a 10",
+ "127.0.0.1",
+ "20",
+ "15.5",
+ "-5",
+ "a fooBaR",
+ "a 127.0.0.1 dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x7f000000,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2})
+
+ // mismatch
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 10000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0xffffffff,
+ maxValue: 0x7f000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65535",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65536",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "123456.78901",
+ "-0.2",
+ "2",
+ "-334",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ rf := &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 0x08000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 11})
+
+ // mismatch
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x80000000,
+ maxValue: 0x90000000,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0xff000000,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &ipv4RangeFilter{
+ fieldName: "foo",
+ minValue: 0x08000000,
+ maxValue: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // mismatch
+ rf := &ipv4RangeFilter{
+ fieldName: "_msg",
+ minValue: 0,
+ maxValue: 0xffffffff,
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+ })
+}
+
+func TestLenRangeFilter(t *testing.T) {
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "10",
+ "10",
+ "10",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = &lenRangeFilter{
+ fieldName: "non-existing-column",
+ minLen: 0,
+ maxLen: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 3,
+ maxLen: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &lenRangeFilter{
+ fieldName: "non-existing-column",
+ minLen: 10,
+ maxLen: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "10",
+ "Abc",
+ "20",
+ "10.5",
+ "10 AFoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 2, 3})
+
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 0,
+ maxLen: 1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 20,
+ maxLen: 30,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a 10",
+ "10",
+ "20",
+ "15.5",
+ "-5",
+ "a fooBaR",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 5})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 100,
+ maxLen: 200,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 2,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 0,
+ maxLen: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 10,
+ maxLen: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "256",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 2,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 0,
+ maxLen: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 10,
+ maxLen: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "65536",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 2,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 0,
+ maxLen: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 10,
+ maxLen: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123456789012",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 2,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 0,
+ maxLen: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 20,
+ maxLen: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "123456.78901",
+ "-0.2",
+ "2",
+ "-334",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 2,
+ maxLen: 2,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 2})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 100,
+ maxLen: 200,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 3,
+ maxLen: 7,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 11})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "foo",
+ minLen: 20,
+ maxLen: 30,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ rf := &lenRangeFilter{
+ fieldName: "_msg",
+ minLen: 10,
+ maxLen: 30,
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ rf = &lenRangeFilter{
+ fieldName: "_msg",
+ minLen: 10,
+ maxLen: 11,
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+ })
+}
+
+func TestRangeFilter(t *testing.T) {
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "10",
+ "10",
+ "10",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: -10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10,
+ maxValue: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -10,
+ maxValue: 9.99,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 20,
+ maxValue: -10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10.1,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "non-existing-column",
+ minValue: 10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 11,
+ maxValue: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "10",
+ "Abc",
+ "20",
+ "10.5",
+ "10 AFoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: -10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 4})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 4})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10.1,
+ maxValue: 19.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{4})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -11,
+ maxValue: 0,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 11,
+ maxValue: 19,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 20.1,
+ maxValue: 100,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 20,
+ maxValue: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a 10",
+ "10",
+ "20",
+ "15.5",
+ "-5",
+ "a fooBaR",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: -100,
+ maxValue: 100,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4, 5})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 10,
+ maxValue: 20,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -5,
+ maxValue: -5,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{5})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -10,
+ maxValue: -5.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 20.1,
+ maxValue: 100,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 20,
+ maxValue: 10,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: -0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 0.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 2.9,
+ maxValue: 0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65535",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: -0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 0.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 2.9,
+ maxValue: 0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "65536",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: 0,
+ maxValue: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: -0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 0.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 2.9,
+ maxValue: 0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: math.Inf(-1),
+ maxValue: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 1000,
+ maxValue: math.Inf(1),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{5})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: -0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 0.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 2.9,
+ maxValue: 0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "123456.78901",
+ "-0.2",
+ "2",
+ "-334",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: math.Inf(-1),
+ maxValue: 3,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 2.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{7})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: 1.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 8})
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 1000,
+ maxValue: math.Inf(1),
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", []int{5})
+
+ // mismatch
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: -1e18,
+ maxValue: -334.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 0.1,
+ maxValue: 0.9,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+
+ rf = &rangeFilter{
+ fieldName: "foo",
+ minValue: 2.9,
+ maxValue: 0.1,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // range filter always mismatches ipv4
+ rf := &rangeFilter{
+ fieldName: "foo",
+ minValue: -100,
+ maxValue: 100,
+ }
+ testFilterMatchForColumns(t, columns, rf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // range filter always mismatches timestmap
+ rf := &rangeFilter{
+ fieldName: "_msg",
+ minValue: -100,
+ maxValue: 100,
+ }
+ testFilterMatchForColumns(t, columns, rf, "_msg", nil)
+ })
+}
+
+func TestAnyCasePrefixFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "aBc DEf",
+ },
+ },
+ {
+ name: "other column",
+ values: []string{
+ "aSDfdsf",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "ABC",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "ab",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "other column",
+ prefix: "asdfdSF",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "other column",
+ prefix: "sd",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "other-column",
+ values: []string{
+ "x",
+ "X",
+ "X",
+ },
+ },
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "ABC DEF",
+ "AbC Def",
+ },
+ },
+ {
+ name: "_msg",
+ values: []string{
+ "1 2 3",
+ "1 2 3",
+ "1 2 3",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "Abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "AB",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "abc de",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: " de",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "other-column",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: " 2 ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "abc def ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "other-column",
+ prefix: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "fOObar",
+ "Abc",
+ "aFDf FooBar baz",
+ "fddf FOObarBAZ",
+ "AFoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "FooBar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 4, 6})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 3, 4, 5, 6})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "ba",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "A FOO",
+ "a fOoBar",
+ "aA aBC A",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a fooBaR",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "нГк",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{8})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "aa a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "!,",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{9})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "aa ax",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "qwe rty abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "@",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 5})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65535",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "123456",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "0.0002",
+ "-320001",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "1234.5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "56789",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "-6553",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "7344.8943",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "-1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "+1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "23",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "678",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "127.0.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "2.3.",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "8",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "127.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "27.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "foo",
+ prefix: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02t15:04:05.005z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{4})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "002",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{1})
+
+ // mimatch
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-03-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "06",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it has different string representation of the timestamp
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T16:04:05.005+01:00",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it contains too many digits for millisecond part
+ pf = &anyCasePrefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T15:04:05.00500Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePrefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+ })
+}
+
+func TestPrefixFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ {
+ name: "other column",
+ values: []string{
+ "asdfdsf",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "ab",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "other column",
+ prefix: "asdfdsf",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "other column",
+ prefix: "sd",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "other-column",
+ values: []string{
+ "x",
+ "x",
+ "x",
+ },
+ },
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ {
+ name: "_msg",
+ values: []string{
+ "1 2 3",
+ "1 2 3",
+ "1 2 3",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "ab",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc de",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: " de",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "other-column",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: " 2 ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "abc def ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "other-column",
+ prefix: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "afoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 4, 6})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 3, 4, 5, 6})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "ba",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "НГК",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{8})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "aa a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "!,",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{9})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "aa ax",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "qwe rty abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "@",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 5})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65535",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "123456",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "1234.5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "56789",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "-6553",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "7344.8943",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "-1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "+1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "23",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "678",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.12.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "foo",
+ prefix: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8, 9})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "127.0.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "2.3.",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "8",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "127.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "27.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "foo",
+ prefix: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ pf := &prefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{4})
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "002",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{1})
+
+ // mimatch
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-03-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "06",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it has different string representation of the timestamp
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T16:04:05.005+01:00",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it contains too many digits for millisecond part
+ pf = &prefixFilter{
+ fieldName: "_msg",
+ prefix: "2006-01-02T15:04:05.00500Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &prefixFilter{
+ fieldName: "non-existing-column",
+ prefix: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+ })
+}
+
+func TestAnyCasePhraseFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "aBc DEf",
+ },
+ },
+ {
+ name: "other column",
+ values: []string{
+ "aSDfdsF",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "Abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "other column",
+ phrase: "ASdfdsf",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "ab",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "other column",
+ phrase: "sd",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing column",
+ phrase: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "other-column",
+ values: []string{
+ "X",
+ "x",
+ "x",
+ },
+ },
+ {
+ name: "foo",
+ values: []string{
+ "aBC def",
+ "abc DEf",
+ "Abc deF",
+ },
+ },
+ {
+ name: "_msg",
+ values: []string{
+ "1 2 3",
+ "1 2 3",
+ "1 2 3",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: " def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "other-column",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: " 2 ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "abc def ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "other-column",
+ phrase: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing column",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "fooBar",
+ "ABc",
+ "afdf foobar BAz",
+ "fddf fOObARbaz",
+ "AfooBarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "FoobAr",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "baZ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing column",
+ phrase: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "A Foobar",
+ "aA aBC a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a aFOObarbaz",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "A",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "НгкШ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{8})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "!,",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{9})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "aa a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "@",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65535",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "123456",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "1234.5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "-65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "-1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "+1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "5678",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "127",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "127.0.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "2.3",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "5",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "127.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "27.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "foo",
+ phrase: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ pf := &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02t15:04:05.005z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{4})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "002Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{1})
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-03-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "06",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it has different string representation of the timestamp
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02T16:04:05.005+01:00",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it contains too many digits for millisecond part
+ pf = &anyCasePhraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02T15:04:05.00500Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+ })
+}
+
+func TestPhraseFilter(t *testing.T) {
+ t.Run("single-row", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ },
+ },
+ {
+ name: "other column",
+ values: []string{
+ "asdfdsf",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "other column",
+ phrase: "asdfdsf",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "ab",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "other column",
+ phrase: "sd",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "non-existing column",
+ phrase: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("const-column", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "other-column",
+ values: []string{
+ "x",
+ "x",
+ "x",
+ },
+ },
+ {
+ name: "foo",
+ values: []string{
+ "abc def",
+ "abc def",
+ "abc def",
+ },
+ },
+ {
+ name: "_msg",
+ values: []string{
+ "1 2 3",
+ "1 2 3",
+ "1 2 3",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: " def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc def",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "other-column",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: " 2 ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "abc def ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "other-column",
+ phrase: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "non-existing column",
+ phrase: "x",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "foo",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("dict", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "",
+ "foobar",
+ "abc",
+ "afdf foobar baz",
+ "fddf foobarbaz",
+ "afoobarbaz",
+ "foobar",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "baz",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "non-existing column",
+ phrase: "foobar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("strings", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "a foo",
+ "a foobar",
+ "aa abc a",
+ "ca afdf a,foobar baz",
+ "a fddf foobarbaz",
+ "a afoobarbaz",
+ "a foobar",
+ "a kjlkjf dfff",
+ "a ТЕСТЙЦУК НГКШ ",
+ "a !!,23.(!1)",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "НГКШ",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{8})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "!,",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{9})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "aa a",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "@",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint8", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "123",
+ "12",
+ "32",
+ "0",
+ "0",
+ "12",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "12",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint16", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65535",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "123456",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint32", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "1234",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("uint64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "65536",
+ "12345678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("float64", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1234",
+ "0",
+ "3454",
+ "-65536",
+ "1234.5678901",
+ "1",
+ "2",
+ "3",
+ "4",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "1234.5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "5678901",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "-65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "65536",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{3})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "-1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "+1234",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "123",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "5678",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "33",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "12345678901234567890",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("ipv4", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "foo",
+ values: []string{
+ "1.2.3.4",
+ "0.0.0.0",
+ "127.0.0.1",
+ "254.255.255.255",
+ "127.0.0.1",
+ "127.0.0.1",
+ "127.0.4.2",
+ "127.0.0.1",
+ "12.0.127.6",
+ "55.55.55.55",
+ "66.66.66.66",
+ "7.7.7.7",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "foo",
+ phrase: "127.0.0.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "127",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "127.0.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "2.3",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+
+ // mismatch
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "5",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "127.1",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "27.0",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+
+ pf = &phraseFilter{
+ fieldName: "foo",
+ phrase: "255.255.255.255",
+ }
+ testFilterMatchForColumns(t, columns, pf, "foo", nil)
+ })
+
+ t.Run("timestamp-iso8601", func(t *testing.T) {
+ columns := []column{
+ {
+ name: "_msg",
+ values: []string{
+ "2006-01-02T15:04:05.001Z",
+ "2006-01-02T15:04:05.002Z",
+ "2006-01-02T15:04:05.003Z",
+ "2006-01-02T15:04:05.004Z",
+ "2006-01-02T15:04:05.005Z",
+ "2006-01-02T15:04:05.006Z",
+ "2006-01-02T15:04:05.007Z",
+ "2006-01-02T15:04:05.008Z",
+ "2006-01-02T15:04:05.009Z",
+ },
+ },
+ }
+
+ // match
+ pf := &phraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{4})
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "002Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{1})
+
+ pf = &phraseFilter{
+ fieldName: "non-existing-column",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
+
+ // mimatch
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "bar",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-03-02T15:04:05.005Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "06",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it has different string representation of the timestamp
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02T16:04:05.005+01:00",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+
+ // This filter shouldn't match row=4, since it contains too many digits for millisecond part
+ pf = &phraseFilter{
+ fieldName: "_msg",
+ phrase: "2006-01-02T15:04:05.00500Z",
+ }
+ testFilterMatchForColumns(t, columns, pf, "_msg", nil)
+ })
+}
+
+func testFilterMatchForTimestamps(t *testing.T, timestamps []int64, f filter, expectedRowIdxs []int) {
+ t.Helper()
+
+ // Create the test storage
+ const storagePath = "testFilterMatchForTimestamps"
+ cfg := &StorageConfig{}
+ s := MustOpenStorage(storagePath, cfg)
+
+ // Generate rows
+ getValue := func(rowIdx int) string {
+ return fmt.Sprintf("some value for row %d", rowIdx)
+ }
+ tenantID := TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ }
+ generateRowsFromTimestamps(s, tenantID, timestamps, getValue)
+
+ expectedResults := make([]string, len(expectedRowIdxs))
+ expectedTimestamps := make([]int64, len(expectedRowIdxs))
+ for i, idx := range expectedRowIdxs {
+ expectedResults[i] = getValue(idx)
+ expectedTimestamps[i] = timestamps[idx]
+ }
+
+ testFilterMatchForStorage(t, s, tenantID, f, "_msg", expectedResults, expectedTimestamps)
+
+ // Close and delete the test storage
+ s.MustClose()
+ fs.MustRemoveAll(storagePath)
+}
+
+func testFilterMatchForColumns(t *testing.T, columns []column, f filter, resultColumnName string, expectedRowIdxs []int) {
+ t.Helper()
+
+ // Create the test storage
+ const storagePath = "testFilterMatchForColumns"
+ cfg := &StorageConfig{}
+ s := MustOpenStorage(storagePath, cfg)
+
+ // Generate rows
+ tenantID := TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ }
+ generateRowsFromColumns(s, tenantID, columns)
+
+ var values []string
+ for _, c := range columns {
+ if c.name == resultColumnName {
+ values = c.values
+ break
+ }
+ }
+ expectedResults := make([]string, len(expectedRowIdxs))
+ expectedTimestamps := make([]int64, len(expectedRowIdxs))
+ for i, idx := range expectedRowIdxs {
+ expectedResults[i] = values[idx]
+ expectedTimestamps[i] = int64(idx) * 1e9
+ }
+
+ testFilterMatchForStorage(t, s, tenantID, f, resultColumnName, expectedResults, expectedTimestamps)
+
+ // Close and delete the test storage
+ s.MustClose()
+ fs.MustRemoveAll(storagePath)
+}
+
+func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f filter, resultColumnName string, expectedResults []string, expectedTimestamps []int64) {
+ t.Helper()
+
+ so := &genericSearchOptions{
+ tenantIDs: []TenantID{tenantID},
+ filter: f,
+ resultColumnNames: []string{resultColumnName},
+ }
+ workersCount := 3
+ s.search(workersCount, so, nil, func(workerID uint, br *blockResult) {
+ // Verify tenantID
+ if !br.streamID.tenantID.equal(&tenantID) {
+ t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID)
+ }
+
+ // Verify columns
+ if len(br.cs) != 1 {
+ t.Fatalf("unexpected number of columns in blockResult; got %d; want 1", len(br.cs))
+ }
+ results := br.getColumnValues(0)
+ if !reflect.DeepEqual(results, expectedResults) {
+ t.Fatalf("unexpected results matched;\ngot\n%q\nwant\n%q", results, expectedResults)
+ }
+
+ // Verify timestamps
+ if br.timestamps == nil {
+ br.timestamps = []int64{}
+ }
+ if !reflect.DeepEqual(br.timestamps, expectedTimestamps) {
+ t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", br.timestamps, expectedTimestamps)
+ }
+ })
+}
+
+func generateRowsFromColumns(s *Storage, tenantID TenantID, columns []column) {
+ streamTags := []string{
+ "job",
+ "instance",
+ }
+ lr := GetLogRows(streamTags, nil)
+ var fields []Field
+ for i := range columns[0].values {
+ // Add stream tags
+ fields = append(fields[:0], Field{
+ Name: "job",
+ Value: "foobar",
+ }, Field{
+ Name: "instance",
+ Value: "host1:234",
+ })
+ // Add other columns
+ for j := range columns {
+ fields = append(fields, Field{
+ Name: columns[j].name,
+ Value: columns[j].values[i],
+ })
+ }
+ timestamp := int64(i) * 1e9
+ lr.MustAdd(tenantID, timestamp, fields)
+ }
+ s.MustAddRows(lr)
+ PutLogRows(lr)
+}
+
+func generateRowsFromTimestamps(s *Storage, tenantID TenantID, timestamps []int64, getValue func(rowIdx int) string) {
+ lr := GetLogRows(nil, nil)
+ var fields []Field
+ for i, timestamp := range timestamps {
+ fields = append(fields[:0], Field{
+ Name: "_msg",
+ Value: getValue(i),
+ })
+ lr.MustAdd(tenantID, timestamp, fields)
+ }
+ s.MustAddRows(lr)
+ PutLogRows(lr)
+}
diff --git a/lib/logstorage/hash128.go b/lib/logstorage/hash128.go
new file mode 100644
index 000000000..68540f894
--- /dev/null
+++ b/lib/logstorage/hash128.go
@@ -0,0 +1,38 @@
+package logstorage
+
+import (
+ "sync"
+
+ "github.com/cespare/xxhash/v2"
+)
+
+func hash128(data []byte) u128 {
+ h := getHasher()
+ _, _ = h.Write(data)
+ hi := h.Sum64()
+ _, _ = h.Write(magicSuffixForHash)
+ lo := h.Sum64()
+ putHasher(h)
+
+ return u128{
+ hi: hi,
+ lo: lo,
+ }
+}
+
+var magicSuffixForHash = []byte("magic!")
+
+func getHasher() *xxhash.Digest {
+ v := hasherPool.Get()
+ if v == nil {
+ return xxhash.New()
+ }
+ return v.(*xxhash.Digest)
+}
+
+func putHasher(h *xxhash.Digest) {
+ h.Reset()
+ hasherPool.Put(h)
+}
+
+var hasherPool sync.Pool
diff --git a/lib/logstorage/hash128_test.go b/lib/logstorage/hash128_test.go
new file mode 100644
index 000000000..05439d9fb
--- /dev/null
+++ b/lib/logstorage/hash128_test.go
@@ -0,0 +1,24 @@
+package logstorage
+
+import (
+ "testing"
+)
+
+func TestHash128(t *testing.T) {
+ f := func(data string, hashExpected u128) {
+ t.Helper()
+ h := hash128([]byte(data))
+ if !h.equal(&hashExpected) {
+ t.Fatalf("unexpected hash; got %s; want %s", &h, &hashExpected)
+ }
+ }
+ f("", u128{
+ hi: 17241709254077376921,
+ lo: 13138662262368978769,
+ })
+
+ f("abc", u128{
+ hi: 4952883123889572249,
+ lo: 3255951525518405514,
+ })
+}
diff --git a/lib/logstorage/hash128_timing_test.go b/lib/logstorage/hash128_timing_test.go
new file mode 100644
index 000000000..7cdccb963
--- /dev/null
+++ b/lib/logstorage/hash128_timing_test.go
@@ -0,0 +1,29 @@
+package logstorage
+
+import (
+ "fmt"
+ "sync/atomic"
+ "testing"
+)
+
+func BenchmarkHash128(b *testing.B) {
+ a := make([][]byte, 100)
+ for i := range a {
+ a[i] = []byte(fmt.Sprintf("some string %d", i))
+ }
+ b.ReportAllocs()
+ b.SetBytes(int64(len(a)))
+ b.RunParallel(func(pb *testing.PB) {
+ var n uint64
+ for pb.Next() {
+ for _, b := range a {
+ h := hash128(b)
+ n += h.hi
+ n += h.lo
+ }
+ }
+ atomic.AddUint64(&GlobalSinkU64, n)
+ })
+}
+
+var GlobalSinkU64 uint64
diff --git a/lib/logstorage/index_block_header.go b/lib/logstorage/index_block_header.go
new file mode 100644
index 000000000..c0654b10b
--- /dev/null
+++ b/lib/logstorage/index_block_header.go
@@ -0,0 +1,164 @@
+package logstorage
+
+import (
+ "fmt"
+ "io"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// indexBlockHeader contains index information about multiple blocks.
+//
+// It allows locating the block by streamID and/or by time range.
+type indexBlockHeader struct {
+ // streamID is the minimum streamID covered by the indexBlockHeader
+ streamID streamID
+
+ // minTimestamp is the mimumum timestamp seen across blocks covered by the indexBlockHeader
+ minTimestamp int64
+
+ // maxTimestamp is the maximum timestamp seen across blocks covered by the indexBlockHeader
+ maxTimestamp int64
+
+ // indexBlockOffset is an offset of the linked index block at indexFilename
+ indexBlockOffset uint64
+
+ // indexBlockSize is the size of the linked index block at indexFilename
+ indexBlockSize uint64
+}
+
+// reset resets ih for subsequent re-use.
+func (ih *indexBlockHeader) reset() {
+ ih.streamID.reset()
+ ih.minTimestamp = 0
+ ih.maxTimestamp = 0
+ ih.indexBlockOffset = 0
+ ih.indexBlockSize = 0
+}
+
+// mustWriteIndexBlock writes data with the given additioanl args to sw and updates ih accordingly.
+func (ih *indexBlockHeader) mustWriteIndexBlock(data []byte, sidFirst streamID, minTimestamp, maxTimestamp int64, sw *streamWriters) {
+ ih.streamID = sidFirst
+ ih.minTimestamp = minTimestamp
+ ih.maxTimestamp = maxTimestamp
+
+ bb := longTermBufPool.Get()
+ bb.B = encoding.CompressZSTDLevel(bb.B[:0], data, 1)
+ ih.indexBlockOffset = sw.indexWriter.bytesWritten
+ ih.indexBlockSize = uint64(len(bb.B))
+ sw.indexWriter.MustWrite(bb.B)
+ longTermBufPool.Put(bb)
+}
+
+// mustReadNextIndexBlock reads the next index block associated with ih from src, appends it to dst and returns the result.
+func (ih *indexBlockHeader) mustReadNextIndexBlock(dst []byte, sr *streamReaders) []byte {
+ indexReader := &sr.indexReader
+
+ indexBlockSize := ih.indexBlockSize
+ if indexBlockSize > maxIndexBlockSize {
+ logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockSize=%d cannot exceed %d bytes", indexReader.Path(), indexBlockSize, maxIndexBlockSize)
+ }
+ if ih.indexBlockOffset != indexReader.bytesRead {
+ logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockOffset=%d must equal to %d", indexReader.Path(), ih.indexBlockOffset, indexReader.bytesRead)
+ }
+ bbCompressed := longTermBufPool.Get()
+ bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize))
+ indexReader.MustReadFull(bbCompressed.B)
+
+ // Decompress bbCompressed to dst
+ var err error
+ dst, err = encoding.DecompressZSTD(dst, bbCompressed.B)
+ longTermBufPool.Put(bbCompressed)
+ if err != nil {
+ logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", indexReader.Path(), ih.indexBlockOffset, indexBlockSize, err)
+ }
+ return dst
+}
+
+// marshal appends marshaled ih to dst and returns the result.
+func (ih *indexBlockHeader) marshal(dst []byte) []byte {
+ dst = ih.streamID.marshal(dst)
+ dst = encoding.MarshalUint64(dst, uint64(ih.minTimestamp))
+ dst = encoding.MarshalUint64(dst, uint64(ih.maxTimestamp))
+ dst = encoding.MarshalUint64(dst, ih.indexBlockOffset)
+ dst = encoding.MarshalUint64(dst, ih.indexBlockSize)
+ return dst
+}
+
+// unmarshal unmarshals ih from src and returns the tail left.
+func (ih *indexBlockHeader) unmarshal(src []byte) ([]byte, error) {
+ srcOrig := src
+
+ // unmarshal ih.streamID
+ tail, err := ih.streamID.unmarshal(src)
+ if err != nil {
+ return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err)
+ }
+ src = tail
+
+ // unmarshal the rest of indexBlockHeader fields
+ if len(src) < 32 {
+ return srcOrig, fmt.Errorf("cannot unmarshal indexBlockHeader from %d bytes; need at least 32 bytes", len(src))
+ }
+ ih.minTimestamp = int64(encoding.UnmarshalUint64(src))
+ ih.maxTimestamp = int64(encoding.UnmarshalUint64(src[8:]))
+ ih.indexBlockOffset = encoding.UnmarshalUint64(src[16:])
+ ih.indexBlockSize = encoding.UnmarshalUint64(src[24:])
+
+ return src[32:], nil
+}
+
+// mustReadIndexBlockHeaders reads indexBlockHeader entries from r, appends them to dst and returns the result.
+func mustReadIndexBlockHeaders(dst []indexBlockHeader, r *readerWithStats) []indexBlockHeader {
+ data, err := io.ReadAll(r)
+ if err != nil {
+ logger.Panicf("FATAL: cannot read indexBlockHeader entries from %s: %s", r.Path(), err)
+ }
+
+ bb := longTermBufPool.Get()
+ bb.B, err = encoding.DecompressZSTD(bb.B[:0], data)
+ if err != nil {
+ logger.Panicf("FATAL: cannot decompress indexBlockHeader entries from %s: %s", r.Path(), err)
+ }
+ dst, err = unmarshalIndexBlockHeaders(dst, bb.B)
+ if len(bb.B) < 1024*1024 {
+ longTermBufPool.Put(bb)
+ }
+ if err != nil {
+ logger.Panicf("FATAL: cannot parse indexBlockHeader entries from %s: %s", r.Path(), err)
+ }
+ return dst
+}
+
+// unmarshalIndexBlockHeaders appends unmarshaled from src indexBlockHeader entries to dst and returns the result.
+func unmarshalIndexBlockHeaders(dst []indexBlockHeader, src []byte) ([]indexBlockHeader, error) {
+ dstOrig := dst
+ for len(src) > 0 {
+ if len(dst) < cap(dst) {
+ dst = dst[:len(dst)+1]
+ } else {
+ dst = append(dst, indexBlockHeader{})
+ }
+ ih := &dst[len(dst)-1]
+ tail, err := ih.unmarshal(src)
+ if err != nil {
+ return dstOrig, fmt.Errorf("cannot unmarshal indexBlockHeader %d: %w", len(dst)-len(dstOrig), err)
+ }
+ src = tail
+ }
+ if err := validateIndexBlockHeaders(dst[len(dstOrig):]); err != nil {
+ return dstOrig, err
+ }
+ return dst, nil
+}
+
+func validateIndexBlockHeaders(ihs []indexBlockHeader) error {
+ for i := 1; i < len(ihs); i++ {
+ if ihs[i].streamID.less(&ihs[i-1].streamID) {
+ return fmt.Errorf("unexpected indexBlockHeader with smaller streamID=%s after bigger streamID=%s", &ihs[i].streamID, &ihs[i-1].streamID)
+ }
+ }
+ return nil
+}
diff --git a/lib/logstorage/index_block_header_test.go b/lib/logstorage/index_block_header_test.go
new file mode 100644
index 000000000..b72137456
--- /dev/null
+++ b/lib/logstorage/index_block_header_test.go
@@ -0,0 +1,138 @@
+package logstorage
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestIndexBlockHeaderMarshalUnmarshal(t *testing.T) {
+ f := func(ih *indexBlockHeader, marshaledLen int) {
+ t.Helper()
+ data := ih.marshal(nil)
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected marshaled length of indexBlockHeader; got %d; want %d", len(data), marshaledLen)
+ }
+ var ih2 indexBlockHeader
+ tail, err := ih2.unmarshal(data)
+ if err != nil {
+ t.Fatalf("cannot unmarshal indexBlockHeader: %s", err)
+ }
+ if len(tail) > 0 {
+ t.Fatalf("unexpected non-empty tail left after unmarshaling indexBlockHeader: %X", tail)
+ }
+ if !reflect.DeepEqual(ih, &ih2) {
+ t.Fatalf("unexpected unmarshaled indexBlockHeader\ngot\n%v\nwant\n%v", &ih2, ih)
+ }
+ }
+ f(&indexBlockHeader{}, 56)
+ f(&indexBlockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ hi: 214,
+ lo: 2111,
+ },
+ },
+ minTimestamp: 1234,
+ maxTimestamp: 898943,
+ indexBlockOffset: 234,
+ indexBlockSize: 898,
+ }, 56)
+}
+
+func TestIndexBlockHeaderUnmarshalFailure(t *testing.T) {
+ f := func(data []byte) {
+ t.Helper()
+ dataOrig := append([]byte{}, data...)
+ var ih indexBlockHeader
+ tail, err := ih.unmarshal(data)
+ if err == nil {
+ t.Fatalf("expecting non-nil error")
+ }
+ if string(tail) != string(dataOrig) {
+ t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig)
+ }
+ }
+ f(nil)
+ f([]byte("foo"))
+
+ ih := &indexBlockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ hi: 214,
+ lo: 2111,
+ },
+ },
+ minTimestamp: 1234,
+ maxTimestamp: 898943,
+ indexBlockOffset: 234,
+ indexBlockSize: 898,
+ }
+ data := ih.marshal(nil)
+ for len(data) > 0 {
+ data = data[:len(data)-1]
+ f(data)
+ }
+}
+
+func TestIndexBlockHeaderReset(t *testing.T) {
+ ih := &indexBlockHeader{
+ streamID: streamID{
+ tenantID: TenantID{
+ AccountID: 123,
+ ProjectID: 456,
+ },
+ id: u128{
+ hi: 214,
+ lo: 2111,
+ },
+ },
+ minTimestamp: 1234,
+ maxTimestamp: 898943,
+ indexBlockOffset: 234,
+ indexBlockSize: 898,
+ }
+ ih.reset()
+ ihZero := &indexBlockHeader{}
+ if !reflect.DeepEqual(ih, ihZero) {
+ t.Fatalf("unexpected non-zero indexBlockHeader after reset: %v", ih)
+ }
+}
+
+func TestMarshalUnmarshalIndexBlockHeaders(t *testing.T) {
+ f := func(ihs []indexBlockHeader, marshaledLen int) {
+ t.Helper()
+ var data []byte
+ for i := range ihs {
+ data = ihs[i].marshal(data)
+ }
+ if len(data) != marshaledLen {
+ t.Fatalf("unexpected marshaled length for indexBlockHeader entries; got %d; want %d", len(data), marshaledLen)
+ }
+ ihs2, err := unmarshalIndexBlockHeaders(nil, data)
+ if err != nil {
+ t.Fatalf("cannot unmarshal indexBlockHeader entries: %s", err)
+ }
+ if !reflect.DeepEqual(ihs, ihs2) {
+ t.Fatalf("unexpected indexBlockHeader entries after unmarshaling\ngot\n%v\nwant\n%v", ihs2, ihs)
+ }
+ }
+ f(nil, 0)
+ f([]indexBlockHeader{{}}, 56)
+ f([]indexBlockHeader{
+ {
+ indexBlockOffset: 234,
+ indexBlockSize: 5432,
+ },
+ {
+ minTimestamp: -123,
+ },
+ }, 112)
+}
diff --git a/lib/logstorage/indexdb.go b/lib/logstorage/indexdb.go
new file mode 100644
index 000000000..4e69a9cdf
--- /dev/null
+++ b/lib/logstorage/indexdb.go
@@ -0,0 +1,900 @@
+package logstorage
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "sort"
+ "sync"
+ "sync/atomic"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
+)
+
+const (
+ // (tenantID:streamID) entries have this prefix
+ //
+ // These entries are used for detecting whether the given stream is already registered
+ nsPrefixStreamID = 0
+
+ // (tenantID:streamID -> streamTagsCanonical) entries have this prefix
+ nsPrefixStreamIDToStreamTags = 1
+
+ // (tenantID:name:value => streamIDs) entries have this prefix
+ nsPrefixTagToStreamIDs = 2
+)
+
+// IndexdbStats contains indexdb stats
+type IndexdbStats struct {
+ // StreamsCreatedTotal is the number of log streams created since the indexdb initialization.
+ StreamsCreatedTotal uint64
+}
+
+type indexdb struct {
+ // streamsCreatedTotal is the number of log streams created since the indexdb intialization.
+ streamsCreatedTotal uint64
+
+ // path is the path to indexdb
+ path string
+
+ // partitionName is the name of the partition for the indexdb.
+ partitionName string
+
+ // tb is the storage for indexdb
+ tb *mergeset.Table
+
+ // indexSearchPool is a pool of indexSearch struct for the given indexdb
+ indexSearchPool sync.Pool
+
+ // the generation of the streamFilterCache.
+ // It is updated each time new item is added to tb.
+ streamFilterCacheGeneration uint32
+
+ // s is the storage where indexdb belongs to.
+ s *Storage
+}
+
+func mustCreateIndexdb(path string) {
+ fs.MustMkdirFailIfExist(path)
+}
+
+func mustOpenIndexdb(path, partitionName string, s *Storage) *indexdb {
+ idb := &indexdb{
+ path: path,
+ partitionName: partitionName,
+ s: s,
+ }
+ isReadOnly := uint32(0)
+ idb.tb = mergeset.MustOpenTable(path, idb.invalidateStreamFilterCache, mergeTagToStreamIDsRows, &isReadOnly)
+ return idb
+}
+
+func mustCloseIndexdb(idb *indexdb) {
+ idb.tb.MustClose()
+ idb.tb = nil
+ idb.s = nil
+ idb.partitionName = ""
+ idb.path = ""
+}
+
+func (idb *indexdb) debugFlush() {
+ idb.tb.DebugFlush()
+}
+
+func (idb *indexdb) updateStats(d *IndexdbStats) {
+ d.StreamsCreatedTotal += atomic.LoadUint64(&idb.streamsCreatedTotal)
+}
+
+func (idb *indexdb) appendStreamTagsByStreamID(dst []byte, sid *streamID) []byte {
+ is := idb.getIndexSearch()
+ defer idb.putIndexSearch(is)
+
+ ts := &is.ts
+ kb := &is.kb
+
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamIDToStreamTags, sid.tenantID)
+ kb.B = sid.id.marshal(kb.B)
+
+ if err := ts.FirstItemWithPrefix(kb.B); err != nil {
+ if err == io.EOF {
+ return dst
+ }
+ logger.Panicf("FATAL: unexpected error when searching for StreamTags by streamID=%s in indexdb: %s", sid, err)
+ }
+ data := ts.Item[len(kb.B):]
+ dst = append(dst, data...)
+ return dst
+}
+
+// hasStreamID returns true if streamID exists in idb
+func (idb *indexdb) hasStreamID(sid *streamID) bool {
+ is := idb.getIndexSearch()
+ defer idb.putIndexSearch(is)
+
+ ts := &is.ts
+ kb := &is.kb
+
+ kb.B = marshalCommonPrefix(kb.B, nsPrefixStreamID, sid.tenantID)
+ kb.B = sid.id.marshal(kb.B)
+
+ if err := ts.FirstItemWithPrefix(kb.B); err != nil {
+ if err == io.EOF {
+ return false
+ }
+ logger.Panicf("FATAL: unexpected error when searching for streamID=%s in indexdb: %s", sid, err)
+ }
+ return len(kb.B) == len(ts.Item)
+}
+
+type indexSearch struct {
+ idb *indexdb
+ ts mergeset.TableSearch
+ kb bytesutil.ByteBuffer
+}
+
+func (idb *indexdb) getIndexSearch() *indexSearch {
+ v := idb.indexSearchPool.Get()
+ if v == nil {
+ v = &indexSearch{
+ idb: idb,
+ }
+ }
+ is := v.(*indexSearch)
+ is.ts.Init(idb.tb)
+ return is
+}
+
+func (idb *indexdb) putIndexSearch(is *indexSearch) {
+ is.idb = nil
+ is.ts.MustClose()
+ is.kb.Reset()
+
+ idb.indexSearchPool.Put(is)
+}
+
+// searchStreamIDs returns streamIDs for the given tenantIDs and the given stream filters
+func (idb *indexdb) searchStreamIDs(tenantIDs []TenantID, sf *StreamFilter) []streamID {
+ // Try obtaining streamIDs from cache
+ streamIDs, ok := idb.loadStreamIDsFromCache(tenantIDs, sf)
+ if ok {
+ // Fast path - streamIDs found in the cache.
+ return streamIDs
+ }
+
+ // Slow path - collect streamIDs from indexdb.
+
+ // Collect streamIDs for all the specified tenantIDs.
+ is := idb.getIndexSearch()
+ m := make(map[streamID]struct{})
+ for _, tenantID := range tenantIDs {
+ for _, asf := range sf.orFilters {
+ is.updateStreamIDs(m, tenantID, asf)
+ }
+ }
+ idb.putIndexSearch(is)
+
+ // Convert the collected streamIDs from m to sorted slice.
+ streamIDs = make([]streamID, 0, len(m))
+ for streamID := range m {
+ streamIDs = append(streamIDs, streamID)
+ }
+ sortStreamIDs(streamIDs)
+
+ // Store the collected streamIDs to cache.
+ idb.storeStreamIDsToCache(tenantIDs, sf, streamIDs)
+
+ return streamIDs
+}
+
+func sortStreamIDs(streamIDs []streamID) {
+ sort.Slice(streamIDs, func(i, j int) bool {
+ return streamIDs[i].less(&streamIDs[j])
+ })
+}
+
+func (is *indexSearch) updateStreamIDs(dst map[streamID]struct{}, tenantID TenantID, asf *andStreamFilter) {
+ var m map[u128]struct{}
+ for _, tf := range asf.tagFilters {
+ ids := is.getStreamIDsForTagFilter(tenantID, tf)
+ if len(ids) == 0 {
+ // There is no need in checking the remaining filters,
+ // since the result will be empty in any case.
+ return
+ }
+ if m == nil {
+ m = ids
+ } else {
+ for id := range m {
+ if _, ok := ids[id]; !ok {
+ delete(m, id)
+ }
+ }
+ }
+ }
+
+ var sid streamID
+ for id := range m {
+ sid.tenantID = tenantID
+ sid.id = id
+ dst[sid] = struct{}{}
+ }
+}
+
+func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTagFilter) map[u128]struct{} {
+ switch tf.op {
+ case "=":
+ if tf.value == "" {
+ // (field="")
+ return is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
+ }
+ // (field="value")
+ return is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value)
+ case "!=":
+ if tf.value == "" {
+ // (field!="")
+ return is.getStreamIDsForTagName(tenantID, tf.tagName)
+ }
+ // (field!="value") => (all and not field="value")
+ ids := is.getStreamIDsForTenant(tenantID)
+ idsForTag := is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value)
+ for id := range idsForTag {
+ delete(ids, id)
+ }
+ return ids
+ case "=~":
+ re := tf.getRegexp()
+ if re.MatchString("") {
+ // (field=~"|re") => (field="" or field=~"re")
+ ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
+ idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
+ for id := range idsForRe {
+ ids[id] = struct{}{}
+ }
+ return ids
+ }
+ return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
+ case "!~":
+ re := tf.getRegexp()
+ if re.MatchString("") {
+ // (field!~"|re") => (field!="" and not field=~"re")
+ ids := is.getStreamIDsForTagName(tenantID, tf.tagName)
+ if len(ids) == 0 {
+ return ids
+ }
+ idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
+ for id := range idsForRe {
+ delete(ids, id)
+ }
+ return ids
+ }
+ // (field!~"re") => (all and not field=~"re")
+ ids := is.getStreamIDsForTenant(tenantID)
+ idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
+ for id := range idsForRe {
+ delete(ids, id)
+ }
+ return ids
+ default:
+ logger.Panicf("BUG: unexpected operation in stream tag filter: %q", tf.op)
+ return nil
+ }
+}
+
+func (is *indexSearch) getStreamIDsForNonEmptyTagValue(tenantID TenantID, tagName, tagValue string) map[u128]struct{} {
+ ids := make(map[u128]struct{})
+ var sp tagToStreamIDsRowParser
+
+ ts := &is.ts
+ kb := &is.kb
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
+ kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
+ kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagValue))
+ prefix := kb.B
+ ts.Seek(prefix)
+ for ts.NextItem() {
+ item := ts.Item
+ if !bytes.HasPrefix(item, prefix) {
+ break
+ }
+ tail := item[len(prefix):]
+ sp.UpdateStreamIDs(ids, tail)
+ }
+ if err := ts.Error(); err != nil {
+ logger.Panicf("FATAL: unexpected error: %s", err)
+ }
+
+ return ids
+}
+
+func (is *indexSearch) getStreamIDsForEmptyTagValue(tenantID TenantID, tagName string) map[u128]struct{} {
+ ids := is.getStreamIDsForTenant(tenantID)
+ idsForTag := is.getStreamIDsForTagName(tenantID, tagName)
+ for id := range idsForTag {
+ delete(ids, id)
+ }
+ return ids
+}
+
+func (is *indexSearch) getStreamIDsForTenant(tenantID TenantID) map[u128]struct{} {
+ ids := make(map[u128]struct{})
+ ts := &is.ts
+ kb := &is.kb
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamID, tenantID)
+ prefix := kb.B
+ ts.Seek(prefix)
+ var id u128
+ for ts.NextItem() {
+ item := ts.Item
+ if !bytes.HasPrefix(item, prefix) {
+ break
+ }
+ tail, err := id.unmarshal(item[len(prefix):])
+ if err != nil {
+ logger.Panicf("FATAL: cannot unmarshal streamID from (tenantID:streamID) entry: %s", err)
+ }
+ if len(tail) > 0 {
+ logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling streamID from (tenantID:streamID); tail len=%d", len(tail))
+ }
+ ids[id] = struct{}{}
+ }
+ if err := ts.Error(); err != nil {
+ logger.Panicf("FATAL: unexpected error: %s", err)
+ }
+
+ return ids
+}
+
+func (is *indexSearch) getStreamIDsForTagName(tenantID TenantID, tagName string) map[u128]struct{} {
+ ids := make(map[u128]struct{})
+ var sp tagToStreamIDsRowParser
+
+ ts := &is.ts
+ kb := &is.kb
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
+ kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
+ prefix := kb.B
+ ts.Seek(prefix)
+ for ts.NextItem() {
+ item := ts.Item
+ if !bytes.HasPrefix(item, prefix) {
+ break
+ }
+ tail := item[len(prefix):]
+ n := bytes.IndexByte(tail, tagSeparatorChar)
+ if n < 0 {
+ logger.Panicf("FATAL: cannot find the end of tag value")
+ }
+ tail = tail[n+1:]
+ sp.UpdateStreamIDs(ids, tail)
+ }
+ if err := ts.Error(); err != nil {
+ logger.Panicf("FATAL: unexpected error: %s", err)
+ }
+
+ return ids
+}
+
+func (is *indexSearch) getStreamIDsForTagRegexp(tenantID TenantID, tagName string, re *regexutil.PromRegex) map[u128]struct{} {
+ ids := make(map[u128]struct{})
+ var sp tagToStreamIDsRowParser
+ var tagValue, prevMatchingTagValue []byte
+ var err error
+
+ ts := &is.ts
+ kb := &is.kb
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
+ kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
+ prefix := kb.B
+ ts.Seek(prefix)
+ for ts.NextItem() {
+ item := ts.Item
+ if !bytes.HasPrefix(item, prefix) {
+ break
+ }
+ tail := item[len(prefix):]
+ tail, tagValue, err = unmarshalTagValue(tagValue[:0], tail)
+ if err != nil {
+ logger.Panicf("FATAL: cannot unmarshal tag value: %s", err)
+ }
+ if !bytes.Equal(tagValue, prevMatchingTagValue) {
+ if !re.MatchString(bytesutil.ToUnsafeString(tagValue)) {
+ continue
+ }
+ prevMatchingTagValue = append(prevMatchingTagValue[:0], tagValue...)
+ }
+ sp.UpdateStreamIDs(ids, tail)
+ }
+ if err := ts.Error(); err != nil {
+ logger.Panicf("FATAL: unexpected error: %s", err)
+ }
+
+ return ids
+}
+
+func (idb *indexdb) mustRegisterStream(streamID *streamID, streamTagsCanonical []byte) {
+ st := GetStreamTags()
+ mustUnmarshalStreamTags(st, streamTagsCanonical)
+ tenantID := streamID.tenantID
+
+ bi := getBatchItems()
+ buf := bi.buf[:0]
+ items := bi.items[:0]
+
+ // Register tenantID:streamID entry.
+ bufLen := len(buf)
+ buf = marshalCommonPrefix(buf, nsPrefixStreamID, tenantID)
+ buf = streamID.id.marshal(buf)
+ items = append(items, buf[bufLen:])
+
+ // Register tenantID:streamID -> streamTagsCanonical entry.
+ bufLen = len(buf)
+ buf = marshalCommonPrefix(buf, nsPrefixStreamIDToStreamTags, tenantID)
+ buf = streamID.id.marshal(buf)
+ buf = append(buf, streamTagsCanonical...)
+ items = append(items, buf[bufLen:])
+
+ // Register tenantID:name:value -> streamIDs entries.
+ tags := st.tags
+ for i := range tags {
+ bufLen = len(buf)
+ buf = marshalCommonPrefix(buf, nsPrefixTagToStreamIDs, tenantID)
+ buf = tags[i].indexdbMarshal(buf)
+ buf = streamID.id.marshal(buf)
+ items = append(items, buf[bufLen:])
+ }
+ PutStreamTags(st)
+
+ // Add items to the storage
+ idb.tb.AddItems(items)
+
+ bi.buf = buf
+ bi.items = items
+ putBatchItems(bi)
+
+ atomic.AddUint64(&idb.streamsCreatedTotal, 1)
+}
+
+func (idb *indexdb) invalidateStreamFilterCache() {
+ // This function must be fast, since it is called each
+ // time new indexdb entry is added.
+ atomic.AddUint32(&idb.streamFilterCacheGeneration, 1)
+}
+
+func (idb *indexdb) marshalStreamFilterCacheKey(dst []byte, tenantIDs []TenantID, sf *StreamFilter) []byte {
+ dst = encoding.MarshalUint32(dst, idb.streamFilterCacheGeneration)
+ dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(idb.partitionName))
+ dst = encoding.MarshalVarUint64(dst, uint64(len(tenantIDs)))
+ for i := range tenantIDs {
+ dst = tenantIDs[i].marshal(dst)
+ }
+ dst = sf.marshalForCacheKey(dst)
+ return dst
+}
+
+func (idb *indexdb) loadStreamIDsFromCache(tenantIDs []TenantID, sf *StreamFilter) ([]streamID, bool) {
+ bb := bbPool.Get()
+ bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf)
+ data := idb.s.streamFilterCache.GetBig(nil, bb.B)
+ bbPool.Put(bb)
+ if len(data) == 0 {
+ // Cache miss
+ return nil, false
+ }
+ // Cache hit - unpack streamIDs from data.
+ tail, n, err := encoding.UnmarshalVarUint64(data)
+ if err != nil {
+ logger.Panicf("BUG: unexpected error when unmarshaling the number of streamIDs from cache: %s", err)
+ }
+ src := tail
+ streamIDs := make([]streamID, n)
+ for i := uint64(0); i < n; i++ {
+ tail, err = streamIDs[i].unmarshal(src)
+ if err != nil {
+ logger.Panicf("BUG: unexpected error when unmarshaling streamID #%d: %s", i, err)
+ }
+ src = tail
+ }
+ if len(src) > 0 {
+ logger.Panicf("BUG: unexpected non-empty tail left with len=%d", len(src))
+ }
+ return streamIDs, true
+}
+
+func (idb *indexdb) storeStreamIDsToCache(tenantIDs []TenantID, sf *StreamFilter, streamIDs []streamID) {
+ // marshal streamIDs
+ var b []byte
+ b = encoding.MarshalVarUint64(b, uint64(len(streamIDs)))
+ for i := 0; i < len(streamIDs); i++ {
+ b = streamIDs[i].marshal(b)
+ }
+
+ // Store marshaled streamIDs to cache.
+ bb := bbPool.Get()
+ bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf)
+ idb.s.streamFilterCache.SetBig(bb.B, b)
+ bbPool.Put(bb)
+}
+
+type batchItems struct {
+ buf []byte
+
+ items [][]byte
+}
+
+func (bi *batchItems) reset() {
+ bi.buf = bi.buf[:0]
+
+ items := bi.items
+ for i := range items {
+ items[i] = nil
+ }
+ bi.items = items[:0]
+}
+
+func getBatchItems() *batchItems {
+ v := batchItemsPool.Get()
+ if v == nil {
+ return &batchItems{}
+ }
+ return v.(*batchItems)
+}
+
+func putBatchItems(bi *batchItems) {
+ bi.reset()
+ batchItemsPool.Put(bi)
+}
+
+var batchItemsPool sync.Pool
+
+func mergeTagToStreamIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) {
+ // Perform quick checks whether items contain rows starting from nsPrefixTagToStreamIDs
+ // based on the fact that items are sorted.
+ if len(items) <= 2 {
+ // The first and the last row must remain unchanged.
+ return data, items
+ }
+ firstItem := items[0].Bytes(data)
+ if len(firstItem) > 0 && firstItem[0] > nsPrefixTagToStreamIDs {
+ return data, items
+ }
+ lastItem := items[len(items)-1].Bytes(data)
+ if len(lastItem) > 0 && lastItem[0] < nsPrefixTagToStreamIDs {
+ return data, items
+ }
+
+ // items contain at least one row starting from nsPrefixTagToStreamIDs. Merge rows with common tag.
+ tsm := getTagToStreamIDsRowsMerger()
+ tsm.dataCopy = append(tsm.dataCopy[:0], data...)
+ tsm.itemsCopy = append(tsm.itemsCopy[:0], items...)
+ sp := &tsm.sp
+ spPrev := &tsm.spPrev
+ dstData := data[:0]
+ dstItems := items[:0]
+ for i, it := range items {
+ item := it.Bytes(data)
+ if len(item) == 0 || item[0] != nsPrefixTagToStreamIDs || i == 0 || i == len(items)-1 {
+ // Write rows not starting with nsPrefixTagToStreamIDs as-is.
+ // Additionally write the first and the last row as-is in order to preserve
+ // sort order for adjacent blocks.
+ dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
+ dstData = append(dstData, item...)
+ dstItems = append(dstItems, mergeset.Item{
+ Start: uint32(len(dstData) - len(item)),
+ End: uint32(len(dstData)),
+ })
+ continue
+ }
+ if err := sp.Init(item); err != nil {
+ logger.Panicf("FATAL: cannot parse row during merge: %s", err)
+ }
+ if sp.StreamIDsLen() >= maxStreamIDsPerRow {
+ dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
+ dstData = append(dstData, item...)
+ dstItems = append(dstItems, mergeset.Item{
+ Start: uint32(len(dstData) - len(item)),
+ End: uint32(len(dstData)),
+ })
+ continue
+ }
+ if !sp.EqualPrefix(spPrev) {
+ dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
+ }
+ sp.ParseStreamIDs()
+ tsm.pendingStreamIDs = append(tsm.pendingStreamIDs, sp.StreamIDs...)
+ spPrev, sp = sp, spPrev
+ if len(tsm.pendingStreamIDs) >= maxStreamIDsPerRow {
+ dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
+ }
+ }
+ if len(tsm.pendingStreamIDs) > 0 {
+ logger.Panicf("BUG: tsm.pendingStreamIDs must be empty at this point; got %d items", len(tsm.pendingStreamIDs))
+ }
+ if !checkItemsSorted(dstData, dstItems) {
+ // Items could become unsorted if initial items contain duplicate streamIDs:
+ //
+ // item1: 1, 1, 5
+ // item2: 1, 4
+ //
+ // Items could become the following after the merge:
+ //
+ // item1: 1, 5
+ // item2: 1, 4
+ //
+ // i.e. item1 > item2
+ //
+ // Leave the original items unmerged, so they can be merged next time.
+ // This case should be quite rare - if multiple data points are simultaneously inserted
+ // into the same new time series from multiple concurrent goroutines.
+ dstData = append(dstData[:0], tsm.dataCopy...)
+ dstItems = append(dstItems[:0], tsm.itemsCopy...)
+ if !checkItemsSorted(dstData, dstItems) {
+ logger.Panicf("BUG: the original items weren't sorted; items=%q", dstItems)
+ }
+ }
+ putTagToStreamIDsRowsMerger(tsm)
+ return dstData, dstItems
+}
+
+// maxStreamIDsPerRow limits the number of streamIDs in tenantID:name:value -> streamIDs row.
+//
+// This reduces overhead on index and metaindex in lib/mergeset.
+const maxStreamIDsPerRow = 32
+
+type u128Sorter []u128
+
+func (s u128Sorter) Len() int { return len(s) }
+func (s u128Sorter) Less(i, j int) bool {
+ return s[i].less(&s[j])
+}
+func (s u128Sorter) Swap(i, j int) {
+ s[i], s[j] = s[j], s[i]
+}
+
+type tagToStreamIDsRowsMerger struct {
+ pendingStreamIDs u128Sorter
+ sp tagToStreamIDsRowParser
+ spPrev tagToStreamIDsRowParser
+
+ itemsCopy []mergeset.Item
+ dataCopy []byte
+}
+
+func (tsm *tagToStreamIDsRowsMerger) Reset() {
+ tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0]
+ tsm.sp.Reset()
+ tsm.spPrev.Reset()
+
+ tsm.itemsCopy = tsm.itemsCopy[:0]
+ tsm.dataCopy = tsm.dataCopy[:0]
+}
+
+func (tsm *tagToStreamIDsRowsMerger) flushPendingStreamIDs(dstData []byte, dstItems []mergeset.Item, sp *tagToStreamIDsRowParser) ([]byte, []mergeset.Item) {
+ if len(tsm.pendingStreamIDs) == 0 {
+ // Nothing to flush
+ return dstData, dstItems
+ }
+ // Use sort.Sort instead of sort.Slice in order to reduce memory allocations.
+ sort.Sort(&tsm.pendingStreamIDs)
+ tsm.pendingStreamIDs = removeDuplicateStreamIDs(tsm.pendingStreamIDs)
+
+ // Marshal pendingStreamIDs
+ dstDataLen := len(dstData)
+ dstData = sp.MarshalPrefix(dstData)
+ pendingStreamIDs := tsm.pendingStreamIDs
+ for i := range pendingStreamIDs {
+ dstData = pendingStreamIDs[i].marshal(dstData)
+ }
+ dstItems = append(dstItems, mergeset.Item{
+ Start: uint32(dstDataLen),
+ End: uint32(len(dstData)),
+ })
+ tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0]
+ return dstData, dstItems
+}
+
+func removeDuplicateStreamIDs(sortedStreamIDs []u128) []u128 {
+ if len(sortedStreamIDs) < 2 {
+ return sortedStreamIDs
+ }
+ hasDuplicates := false
+ for i := 1; i < len(sortedStreamIDs); i++ {
+ if sortedStreamIDs[i-1] == sortedStreamIDs[i] {
+ hasDuplicates = true
+ break
+ }
+ }
+ if !hasDuplicates {
+ return sortedStreamIDs
+ }
+ dstStreamIDs := sortedStreamIDs[:1]
+ for i := 1; i < len(sortedStreamIDs); i++ {
+ if sortedStreamIDs[i-1] == sortedStreamIDs[i] {
+ continue
+ }
+ dstStreamIDs = append(dstStreamIDs, sortedStreamIDs[i])
+ }
+ return dstStreamIDs
+}
+
+func getTagToStreamIDsRowsMerger() *tagToStreamIDsRowsMerger {
+ v := tsmPool.Get()
+ if v == nil {
+ return &tagToStreamIDsRowsMerger{}
+ }
+ return v.(*tagToStreamIDsRowsMerger)
+}
+
+func putTagToStreamIDsRowsMerger(tsm *tagToStreamIDsRowsMerger) {
+ tsm.Reset()
+ tsmPool.Put(tsm)
+}
+
+var tsmPool sync.Pool
+
+type tagToStreamIDsRowParser struct {
+ // TenantID contains TenantID of the parsed row
+ TenantID TenantID
+
+ // StreamIDs contains parsed StreamIDs after ParseStreamIDs call
+ StreamIDs []u128
+
+ // streamIDsParsed is set to true after ParseStreamIDs call
+ streamIDsParsed bool
+
+ // Tag contains parsed tag after Init call
+ Tag streamTag
+
+ // tail contains the remaining unparsed streamIDs
+ tail []byte
+}
+
+func (sp *tagToStreamIDsRowParser) Reset() {
+ sp.TenantID.Reset()
+ sp.StreamIDs = sp.StreamIDs[:0]
+ sp.streamIDsParsed = false
+ sp.Tag.reset()
+ sp.tail = nil
+}
+
+// Init initializes sp from b, which should contain encoded tenantID:name:value -> streamIDs row.
+//
+// b cannot be re-used until Reset call.
+//
+// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail.
+func (sp *tagToStreamIDsRowParser) Init(b []byte) error {
+ tail, nsPrefix, err := unmarshalCommonPrefix(&sp.TenantID, b)
+ if err != nil {
+ return fmt.Errorf("invalid tenantID:name:value -> streamIDs row %q: %w", b, err)
+ }
+ if nsPrefix != nsPrefixTagToStreamIDs {
+ return fmt.Errorf("invalid prefix for tenantID:name:value -> streamIDs row %q; got %d; want %d", b, nsPrefix, nsPrefixTagToStreamIDs)
+ }
+ tail, err = sp.Tag.indexdbUnmarshal(tail)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal tag from tenantID:name:value -> streamIDs row %q: %w", b, err)
+ }
+ if err = sp.InitOnlyTail(tail); err != nil {
+ return fmt.Errorf("cannot initialize tail from tenantID:name:value -> streamIDs row %q: %w", b, err)
+ }
+ return nil
+}
+
+// MarshalPrefix marshals row prefix without tail to dst.
+func (sp *tagToStreamIDsRowParser) MarshalPrefix(dst []byte) []byte {
+ dst = marshalCommonPrefix(dst, nsPrefixTagToStreamIDs, sp.TenantID)
+ dst = sp.Tag.indexdbMarshal(dst)
+ return dst
+}
+
+// InitOnlyTail initializes sp.tail from tail, which must contain streamIDs.
+//
+// tail cannot be re-used until Reset call.
+//
+// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail.
+func (sp *tagToStreamIDsRowParser) InitOnlyTail(tail []byte) error {
+ if len(tail) == 0 {
+ return fmt.Errorf("missing streamID in the tenantID:name:value -> streamIDs row")
+ }
+ if len(tail)%16 != 0 {
+ return fmt.Errorf("invalid tail length in the tenantID:name:value -> streamIDs row; got %d bytes; must be multiple of 16 bytes", len(tail))
+ }
+ sp.tail = tail
+ sp.streamIDsParsed = false
+ return nil
+}
+
+// EqualPrefix returns true if prefixes for sp and x are equal.
+//
+// Prefix contains (tenantID:name:value)
+func (sp *tagToStreamIDsRowParser) EqualPrefix(x *tagToStreamIDsRowParser) bool {
+ if !sp.TenantID.equal(&x.TenantID) {
+ return false
+ }
+ if !sp.Tag.equal(&x.Tag) {
+ return false
+ }
+ return true
+}
+
+// StreamIDsLen returns the number of StreamIDs in the sp.tail
+func (sp *tagToStreamIDsRowParser) StreamIDsLen() int {
+ return len(sp.tail) / 16
+}
+
+// ParseStreamIDs parses StreamIDs from sp.tail into sp.StreamIDs.
+func (sp *tagToStreamIDsRowParser) ParseStreamIDs() {
+ if sp.streamIDsParsed {
+ return
+ }
+ tail := sp.tail
+ n := len(tail) / 16
+ streamIDs := sp.StreamIDs[:0]
+ if n <= cap(streamIDs) {
+ streamIDs = streamIDs[:n]
+ } else {
+ streamIDs = append(streamIDs[:cap(streamIDs)], make([]u128, n-cap(streamIDs))...)
+ }
+ sp.StreamIDs = streamIDs
+ for i := 0; i < n; i++ {
+ var err error
+ tail, err = streamIDs[i].unmarshal(tail)
+ if err != nil {
+ logger.Panicf("FATAL: cannot unmarshal streamID: %s", err)
+ }
+ }
+ sp.streamIDsParsed = true
+}
+
+func (sp *tagToStreamIDsRowParser) UpdateStreamIDs(ids map[u128]struct{}, tail []byte) {
+ sp.Reset()
+ if err := sp.InitOnlyTail(tail); err != nil {
+ logger.Panicf("FATAL: cannot parse '(date, tag) -> streamIDs' row: %s", err)
+ }
+ sp.ParseStreamIDs()
+ for _, id := range sp.StreamIDs {
+ ids[id] = struct{}{}
+ }
+}
+
+// commonPrefixLen is the length of common prefix for indexdb rows
+// 1 byte for ns* prefix + 8 bytes for tenantID
+const commonPrefixLen = 1 + 8
+
+func marshalCommonPrefix(dst []byte, nsPrefix byte, tenantID TenantID) []byte {
+ dst = append(dst, nsPrefix)
+ dst = tenantID.marshal(dst)
+ return dst
+}
+
+func unmarshalCommonPrefix(dstTenantID *TenantID, src []byte) ([]byte, byte, error) {
+ if len(src) < commonPrefixLen {
+ return nil, 0, fmt.Errorf("cannot unmarshal common prefix from %d bytes; need at least %d bytes; data=%X", len(src), commonPrefixLen, src)
+ }
+ prefix := src[0]
+ src = src[1:]
+ tail, err := dstTenantID.unmarshal(src)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot unmarshal tenantID: %s", err)
+ }
+ return tail, prefix, nil
+}
+
+func checkItemsSorted(data []byte, items []mergeset.Item) bool {
+ if len(items) == 0 {
+ return true
+ }
+ prevItem := items[0].String(data)
+ for _, it := range items[1:] {
+ currItem := it.String(data)
+ if prevItem > currItem {
+ return false
+ }
+ prevItem = currItem
+ }
+ return true
+}
diff --git a/lib/logstorage/indexdb_test.go b/lib/logstorage/indexdb_test.go
new file mode 100644
index 000000000..02e0951f0
--- /dev/null
+++ b/lib/logstorage/indexdb_test.go
@@ -0,0 +1,253 @@
+package logstorage
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+)
+
+func TestStorageSearchStreamIDs(t *testing.T) {
+ const path = "TestStorageSearchStreamIDs"
+ const partitionName = "foobar"
+ s := newTestStorage()
+ mustCreateIndexdb(path)
+ idb := mustOpenIndexdb(path, partitionName, s)
+
+ tenantID := TenantID{
+ AccountID: 123,
+ ProjectID: 567,
+ }
+ getStreamIDForTags := func(tags map[string]string) (streamID, []byte) {
+ st := GetStreamTags()
+ for k, v := range tags {
+ st.Add(k, v)
+ }
+ streamTagsCanonical := st.MarshalCanonical(nil)
+ PutStreamTags(st)
+ id := hash128(streamTagsCanonical)
+ sid := streamID{
+ tenantID: tenantID,
+ id: id,
+ }
+ return sid, streamTagsCanonical
+ }
+
+ // Create indexdb entries
+ const jobsCount = 7
+ const instancesCount = 5
+ for i := 0; i < jobsCount; i++ {
+ for j := 0; j < instancesCount; j++ {
+ sid, streamTagsCanonical := getStreamIDForTags(map[string]string{
+ "job": fmt.Sprintf("job-%d", i),
+ "instance": fmt.Sprintf("instance-%d", j),
+ })
+ idb.mustRegisterStream(&sid, streamTagsCanonical)
+ }
+ }
+ idb.debugFlush()
+
+ f := func(streamFilter string, expectedStreamIDs []streamID) {
+ t.Helper()
+ sf := mustNewStreamFilter(streamFilter)
+ if expectedStreamIDs == nil {
+ expectedStreamIDs = []streamID{}
+ }
+ sortStreamIDs(expectedStreamIDs)
+ for i := 0; i < 3; i++ {
+ streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
+ if !reflect.DeepEqual(streamIDs, expectedStreamIDs) {
+ t.Fatalf("unexpected streamIDs on iteration %d; got %v; want %v", i, streamIDs, expectedStreamIDs)
+ }
+ }
+ }
+
+ t.Run("missing-tenant-id", func(t *testing.T) {
+ tenantID := TenantID{
+ AccountID: 1,
+ ProjectID: 2,
+ }
+ sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`)
+ for i := 0; i < 3; i++ {
+ streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
+ if len(streamIDs) > 0 {
+ t.Fatalf("unexpected non-empty streamIDs on iteration %d: %d", i, len(streamIDs))
+ }
+ }
+ })
+ t.Run("missing-job", func(t *testing.T) {
+ f(`{job="non-existing-job",instance="instance-0"}`, nil)
+ })
+ t.Run("missing-job-re", func(t *testing.T) {
+ f(`{job=~"non-existing-job|",instance="instance-0"}`, nil)
+ })
+ t.Run("missing-job-negative-re", func(t *testing.T) {
+ f(`{job!~"job.+",instance="instance-0"}`, nil)
+ })
+ t.Run("empty-job", func(t *testing.T) {
+ f(`{job="",instance="instance-0"}`, nil)
+ })
+ t.Run("missing-instance", func(t *testing.T) {
+ f(`{job="job-0",instance="non-existing-instance"}`, nil)
+ })
+ t.Run("missing-instance-re", func(t *testing.T) {
+ f(`{job="job-0",instance=~"non-existing-instance|"}`, nil)
+ })
+ t.Run("missing-instance-negative-re", func(t *testing.T) {
+ f(`{job="job-0",instance!~"instance.+"}`, nil)
+ })
+ t.Run("empty-instance", func(t *testing.T) {
+ f(`{job="job-0",instance=""}`, nil)
+ })
+ t.Run("non-existing-tag", func(t *testing.T) {
+ f(`{job="job-0",instance="instance-0",non_existing_tag="foobar"}`, nil)
+ })
+ t.Run("non-existing-non-empty-tag", func(t *testing.T) {
+ f(`{job="job-0",instance="instance-0",non_existing_tag!=""}`, nil)
+ })
+ t.Run("non-existing-tag-re", func(t *testing.T) {
+ f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo.+"}`, nil)
+ })
+ t.Run("non-existing-non-empty-tag-re", func(t *testing.T) {
+ f(`{job="job-0",instance="instance-0",non_existing_tag!~""}`, nil)
+ })
+
+ t.Run("match-job-instance", func(t *testing.T) {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": "instance-0",
+ "job": "job-0",
+ })
+ f(`{job="job-0",instance="instance-0"}`, []streamID{sid})
+ })
+ t.Run("match-non-existing-tag", func(t *testing.T) {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": "instance-0",
+ "job": "job-0",
+ })
+ f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo|"}`, []streamID{sid})
+ })
+ t.Run("match-job", func(t *testing.T) {
+ var streamIDs []streamID
+ for i := 0; i < instancesCount; i++ {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", i),
+ "job": "job-0",
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ f(`{job="job-0"}`, streamIDs)
+ })
+ t.Run("match-instance", func(t *testing.T) {
+ var streamIDs []streamID
+ for i := 0; i < jobsCount; i++ {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": "instance-1",
+ "job": fmt.Sprintf("job-%d", i),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ f(`{instance="instance-1"}`, streamIDs)
+ })
+ t.Run("match-re", func(t *testing.T) {
+ var streamIDs []streamID
+ for _, instanceID := range []int{3, 1} {
+ for _, jobID := range []int{0, 2} {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", instanceID),
+ "job": fmt.Sprintf("job-%d", jobID),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ }
+ f(`{job=~"job-(0|2)",instance=~"instance-[13]"}`, streamIDs)
+ })
+ t.Run("match-re-empty-match", func(t *testing.T) {
+ var streamIDs []streamID
+ for _, instanceID := range []int{3, 1} {
+ for _, jobID := range []int{0, 2} {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", instanceID),
+ "job": fmt.Sprintf("job-%d", jobID),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ }
+ f(`{job=~"job-(0|2)|",instance=~"instance-[13]"}`, streamIDs)
+ })
+ t.Run("match-negative-re", func(t *testing.T) {
+ var instanceIDs []int
+ for i := 0; i < instancesCount; i++ {
+ if i != 0 && i != 1 {
+ instanceIDs = append(instanceIDs, i)
+ }
+ }
+ var jobIDs []int
+ for i := 0; i < jobsCount; i++ {
+ if i > 2 {
+ jobIDs = append(jobIDs, i)
+ }
+ }
+ var streamIDs []streamID
+ for _, instanceID := range instanceIDs {
+ for _, jobID := range jobIDs {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", instanceID),
+ "job": fmt.Sprintf("job-%d", jobID),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ }
+ f(`{job!~"job-[0-2]",instance!~"instance-(0|1)"}`, streamIDs)
+ })
+ t.Run("match-negative-re-empty-match", func(t *testing.T) {
+ var instanceIDs []int
+ for i := 0; i < instancesCount; i++ {
+ if i != 0 && i != 1 {
+ instanceIDs = append(instanceIDs, i)
+ }
+ }
+ var jobIDs []int
+ for i := 0; i < jobsCount; i++ {
+ if i > 2 {
+ jobIDs = append(jobIDs, i)
+ }
+ }
+ var streamIDs []streamID
+ for _, instanceID := range instanceIDs {
+ for _, jobID := range jobIDs {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", instanceID),
+ "job": fmt.Sprintf("job-%d", jobID),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ }
+ f(`{job!~"job-[0-2]",instance!~"instance-(0|1)|"}`, streamIDs)
+ })
+ t.Run("match-negative-job", func(t *testing.T) {
+ instanceIDs := []int{2}
+ var jobIDs []int
+ for i := 0; i < jobsCount; i++ {
+ if i != 1 {
+ jobIDs = append(jobIDs, i)
+ }
+ }
+ var streamIDs []streamID
+ for _, instanceID := range instanceIDs {
+ for _, jobID := range jobIDs {
+ sid, _ := getStreamIDForTags(map[string]string{
+ "instance": fmt.Sprintf("instance-%d", instanceID),
+ "job": fmt.Sprintf("job-%d", jobID),
+ })
+ streamIDs = append(streamIDs, sid)
+ }
+ }
+ f(`{instance="instance-2",job!="job-1"}`, streamIDs)
+ })
+
+ mustCloseIndexdb(idb)
+ fs.MustRemoveAll(path)
+
+ closeTestStorage(s)
+}
diff --git a/lib/logstorage/inmemory_part.go b/lib/logstorage/inmemory_part.go
new file mode 100644
index 000000000..2afd970ec
--- /dev/null
+++ b/lib/logstorage/inmemory_part.go
@@ -0,0 +1,155 @@
+package logstorage
+
+import (
+ "path/filepath"
+ "sort"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+)
+
+// inmemoryPart is an in-memory part.
+type inmemoryPart struct {
+ // ph contains partHeader information for the given in-memory part.
+ ph partHeader
+
+ metaindex bytesutil.ByteBuffer
+ index bytesutil.ByteBuffer
+ columnsHeader bytesutil.ByteBuffer
+ timestamps bytesutil.ByteBuffer
+ fieldValues bytesutil.ByteBuffer
+ fieldBloomFilter bytesutil.ByteBuffer
+ messageValues bytesutil.ByteBuffer
+ messageBloomFilter bytesutil.ByteBuffer
+}
+
+// reset resets mp, so it can be re-used
+func (mp *inmemoryPart) reset() {
+ mp.ph.reset()
+
+ mp.metaindex.Reset()
+ mp.index.Reset()
+ mp.columnsHeader.Reset()
+ mp.timestamps.Reset()
+ mp.fieldValues.Reset()
+ mp.fieldBloomFilter.Reset()
+ mp.messageValues.Reset()
+ mp.messageBloomFilter.Reset()
+}
+
+// mustInitFromRows initializes mp from lr.
+func (mp *inmemoryPart) mustInitFromRows(lr *LogRows) {
+ mp.reset()
+
+ if len(lr.timestamps) == 0 {
+ return
+ }
+
+ sort.Sort(lr)
+
+ bsw := getBlockStreamWriter()
+ bsw.MustInitForInmemoryPart(mp)
+ trs := getTmpRows()
+ var sidPrev *streamID
+ uncompressedBlockSizeBytes := uint64(0)
+ timestamps := lr.timestamps
+ rows := lr.rows
+ streamIDs := lr.streamIDs
+ for i := range timestamps {
+ streamID := &streamIDs[i]
+ if sidPrev == nil {
+ sidPrev = streamID
+ }
+
+ if uncompressedBlockSizeBytes >= maxUncompressedBlockSize || !streamID.equal(sidPrev) {
+ bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows)
+ trs.reset()
+ sidPrev = streamID
+ uncompressedBlockSizeBytes = 0
+ }
+ fields := rows[i]
+ trs.timestamps = append(trs.timestamps, timestamps[i])
+ trs.rows = append(trs.rows, fields)
+ uncompressedBlockSizeBytes += uncompressedRowSizeBytes(fields)
+ }
+ bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows)
+ putTmpRows(trs)
+ bsw.Finalize(&mp.ph)
+ putBlockStreamWriter(bsw)
+}
+
+// MustStoreToDisk stores mp to disk at the given path.
+func (mp *inmemoryPart) MustStoreToDisk(path string) {
+ fs.MustMkdirFailIfExist(path)
+
+ metaindexPath := filepath.Join(path, metaindexFilename)
+ indexPath := filepath.Join(path, indexFilename)
+ columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
+ timestampsPath := filepath.Join(path, timestampsFilename)
+ fieldValuesPath := filepath.Join(path, fieldValuesFilename)
+ fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
+ messageValuesPath := filepath.Join(path, messageValuesFilename)
+ messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
+
+ fs.MustWriteSync(metaindexPath, mp.metaindex.B)
+ fs.MustWriteSync(indexPath, mp.index.B)
+ fs.MustWriteSync(columnsHeaderPath, mp.columnsHeader.B)
+ fs.MustWriteSync(timestampsPath, mp.timestamps.B)
+ fs.MustWriteSync(fieldValuesPath, mp.fieldValues.B)
+ fs.MustWriteSync(fieldBloomFilterPath, mp.fieldBloomFilter.B)
+ fs.MustWriteSync(messageValuesPath, mp.messageValues.B)
+ fs.MustWriteSync(messageBloomFilterPath, mp.messageBloomFilter.B)
+
+ mp.ph.mustWriteMetadata(path)
+
+ fs.MustSyncPath(path)
+ // Do not sync parent directory - it must be synced by the caller.
+}
+
+// tmpRows is used as a helper for inmemoryPart.mustInitFromRows()
+type tmpRows struct {
+ timestamps []int64
+
+ rows [][]Field
+}
+
+func (trs *tmpRows) reset() {
+ trs.timestamps = trs.timestamps[:0]
+
+ rows := trs.rows
+ for i := range rows {
+ rows[i] = nil
+ }
+ trs.rows = rows[:0]
+}
+
+func getTmpRows() *tmpRows {
+ v := tmpRowsPool.Get()
+ if v == nil {
+ return &tmpRows{}
+ }
+ return v.(*tmpRows)
+}
+
+func putTmpRows(trs *tmpRows) {
+ trs.reset()
+ tmpRowsPool.Put(trs)
+}
+
+var tmpRowsPool sync.Pool
+
+func getInmemoryPart() *inmemoryPart {
+ v := inmemoryPartPool.Get()
+ if v == nil {
+ return &inmemoryPart{}
+ }
+ return v.(*inmemoryPart)
+}
+
+func putInmemoryPart(mp *inmemoryPart) {
+ mp.reset()
+ inmemoryPartPool.Put(mp)
+}
+
+var inmemoryPartPool sync.Pool
diff --git a/lib/logstorage/inmemory_part_test.go b/lib/logstorage/inmemory_part_test.go
new file mode 100644
index 000000000..85b2c9f78
--- /dev/null
+++ b/lib/logstorage/inmemory_part_test.go
@@ -0,0 +1,343 @@
+package logstorage
+
+import (
+ "fmt"
+ "math"
+ "math/rand"
+ "reflect"
+ "sort"
+ "testing"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+func TestInmemoryPartMustInitFromRows(t *testing.T) {
+ f := func(lr *LogRows, blocksCountExpected int, compressionRateExpected float64) {
+ t.Helper()
+
+ uncompressedSizeBytesExpected := uncompressedRowsSizeBytes(lr.rows)
+ rowsCountExpected := len(lr.timestamps)
+ minTimestampExpected := int64(math.MaxInt64)
+ maxTimestampExpected := int64(math.MinInt64)
+
+ // make a copy of lr - it is used for comapring the results later,
+ // since lr may be modified by inmemoryPart.mustInitFromRows()
+ lrOrig := GetLogRows(nil, nil)
+ for i, timestamp := range lr.timestamps {
+ if timestamp < minTimestampExpected {
+ minTimestampExpected = timestamp
+ }
+ if timestamp > maxTimestampExpected {
+ maxTimestampExpected = timestamp
+ }
+ lrOrig.mustAddInternal(lr.streamIDs[i], timestamp, lr.rows[i], lr.streamTagsCanonicals[i])
+ }
+
+ // Create inmemory part from lr
+ mp := getInmemoryPart()
+ mp.mustInitFromRows(lr)
+
+ // Check mp.ph
+ ph := &mp.ph
+ checkCompressionRate(t, ph, compressionRateExpected)
+ if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
+ t.Fatalf("unexpected UncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
+ }
+ if ph.RowsCount != uint64(rowsCountExpected) {
+ t.Fatalf("unexpected rowsCount in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
+ }
+ if ph.BlocksCount != uint64(blocksCountExpected) {
+ t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
+ }
+ if ph.RowsCount > 0 {
+ if ph.MinTimestamp != minTimestampExpected {
+ t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
+ }
+ if ph.MaxTimestamp != maxTimestampExpected {
+ t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
+ }
+ }
+
+ // Read log entries from mp to rrsResult
+ sbu := getStringsBlockUnmarshaler()
+ defer putStringsBlockUnmarshaler(sbu)
+ vd := getValuesDecoder()
+ defer putValuesDecoder(vd)
+ lrResult := mp.readLogRows(sbu, vd)
+ putInmemoryPart(mp)
+
+ // compare lrOrig to lrResult
+ if err := checkEqualRows(lrResult, lrOrig); err != nil {
+ t.Fatalf("unequal log entries: %s", err)
+ }
+ }
+
+ f(GetLogRows(nil, nil), 0, 0)
+
+ // Check how inmemoryPart works with a single stream
+ f(newTestLogRows(1, 1, 0), 1, 0.8)
+ f(newTestLogRows(1, 2, 0), 1, 0.9)
+ f(newTestLogRows(1, 10, 0), 1, 2.0)
+ f(newTestLogRows(1, 1000, 0), 1, 7.1)
+ f(newTestLogRows(1, 20000, 0), 2, 7.2)
+
+ // Check how inmemoryPart works with multiple streams
+ f(newTestLogRows(2, 1, 0), 2, 0.8)
+ f(newTestLogRows(10, 1, 0), 10, 0.9)
+ f(newTestLogRows(100, 1, 0), 100, 1.0)
+ f(newTestLogRows(10, 5, 0), 10, 1.4)
+ f(newTestLogRows(10, 1000, 0), 10, 7.2)
+ f(newTestLogRows(100, 100, 0), 100, 5.0)
+}
+
+func checkCompressionRate(t *testing.T, ph *partHeader, compressionRateExpected float64) {
+ t.Helper()
+ compressionRate := float64(ph.UncompressedSizeBytes) / float64(ph.CompressedSizeBytes)
+ if math.Abs(compressionRate-compressionRateExpected) > 0.1 {
+ t.Fatalf("unexpected compression rate; got %.1f; want %.1f", compressionRate, compressionRateExpected)
+ }
+}
+
+func TestInmemoryPartInitFromBlockStreamReaders(t *testing.T) {
+ f := func(lrs []*LogRows, blocksCountExpected int, compressionRateExpected float64) {
+ t.Helper()
+
+ uncompressedSizeBytesExpected := uint64(0)
+ rowsCountExpected := 0
+ minTimestampExpected := int64(math.MaxInt64)
+ maxTimestampExpected := int64(math.MinInt64)
+
+ // make a copy of rrss in order to compare the results after merge.
+ lrOrig := GetLogRows(nil, nil)
+ for _, lr := range lrs {
+ uncompressedSizeBytesExpected += uncompressedRowsSizeBytes(lr.rows)
+ rowsCountExpected += len(lr.timestamps)
+ for j, timestamp := range lr.timestamps {
+ if timestamp < minTimestampExpected {
+ minTimestampExpected = timestamp
+ }
+ if timestamp > maxTimestampExpected {
+ maxTimestampExpected = timestamp
+ }
+ lrOrig.mustAddInternal(lr.streamIDs[j], timestamp, lr.rows[j], lr.streamTagsCanonicals[j])
+ }
+ }
+
+ // Initialize readers from lrs
+ var mpsSrc []*inmemoryPart
+ var bsrs []*blockStreamReader
+ for _, lr := range lrs {
+ mp := getInmemoryPart()
+ mp.mustInitFromRows(lr)
+ mpsSrc = append(mpsSrc, mp)
+
+ bsr := getBlockStreamReader()
+ bsr.MustInitFromInmemoryPart(mp)
+ bsrs = append(bsrs, bsr)
+ }
+ defer func() {
+ for _, bsr := range bsrs {
+ putBlockStreamReader(bsr)
+ }
+ for _, mp := range mpsSrc {
+ putInmemoryPart(mp)
+ }
+ }()
+
+ // Merge data from bsrs into mpDst
+ mpDst := getInmemoryPart()
+ bsw := getBlockStreamWriter()
+ bsw.MustInitForInmemoryPart(mpDst)
+ mustMergeBlockStreams(&mpDst.ph, bsw, bsrs, nil)
+ putBlockStreamWriter(bsw)
+
+ // Check mpDst.ph stats
+ ph := &mpDst.ph
+ checkCompressionRate(t, ph, compressionRateExpected)
+ if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
+ t.Fatalf("unexpected uncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
+ }
+ if ph.RowsCount != uint64(rowsCountExpected) {
+ t.Fatalf("unexpected number of entries in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
+ }
+ if ph.BlocksCount != uint64(blocksCountExpected) {
+ t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
+ }
+ if ph.RowsCount > 0 {
+ if ph.MinTimestamp != minTimestampExpected {
+ t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
+ }
+ if ph.MaxTimestamp != maxTimestampExpected {
+ t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
+ }
+ }
+
+ // Read log entries from mpDst to rrsResult
+ sbu := getStringsBlockUnmarshaler()
+ defer putStringsBlockUnmarshaler(sbu)
+ vd := getValuesDecoder()
+ defer putValuesDecoder(vd)
+ lrResult := mpDst.readLogRows(sbu, vd)
+ putInmemoryPart(mpDst)
+
+ // compare rrsOrig to rrsResult
+ if err := checkEqualRows(lrResult, lrOrig); err != nil {
+ t.Fatalf("unequal log entries: %s", err)
+ }
+ }
+
+ // Check empty readers
+ f(nil, 0, 0)
+ f([]*LogRows{GetLogRows(nil, nil)}, 0, 0)
+ f([]*LogRows{GetLogRows(nil, nil), GetLogRows(nil, nil)}, 0, 0)
+
+ // Check merge with a single reader
+ f([]*LogRows{newTestLogRows(1, 1, 0)}, 1, 0.8)
+ f([]*LogRows{newTestLogRows(1, 10, 0)}, 1, 2.0)
+ f([]*LogRows{newTestLogRows(1, 100, 0)}, 1, 4.9)
+ f([]*LogRows{newTestLogRows(1, 1000, 0)}, 1, 7.1)
+ f([]*LogRows{newTestLogRows(1, 10000, 0)}, 1, 7.4)
+ f([]*LogRows{newTestLogRows(10, 1, 0)}, 10, 0.9)
+ f([]*LogRows{newTestLogRows(100, 1, 0)}, 100, 1.0)
+ f([]*LogRows{newTestLogRows(1000, 1, 0)}, 1000, 1.0)
+ f([]*LogRows{newTestLogRows(10, 10, 0)}, 10, 2.1)
+ f([]*LogRows{newTestLogRows(10, 100, 0)}, 10, 4.9)
+
+ //Check merge with multiple readers
+ f([]*LogRows{
+ newTestLogRows(1, 1, 0),
+ newTestLogRows(1, 1, 1),
+ }, 2, 0.9)
+ f([]*LogRows{
+ newTestLogRows(2, 2, 0),
+ newTestLogRows(2, 2, 0),
+ }, 2, 1.8)
+ f([]*LogRows{
+ newTestLogRows(1, 20, 0),
+ newTestLogRows(1, 10, 1),
+ newTestLogRows(1, 5, 2),
+ }, 3, 2.2)
+ f([]*LogRows{
+ newTestLogRows(10, 20, 0),
+ newTestLogRows(20, 10, 1),
+ newTestLogRows(30, 5, 2),
+ }, 60, 2.0)
+ f([]*LogRows{
+ newTestLogRows(10, 20, 0),
+ newTestLogRows(20, 10, 1),
+ newTestLogRows(30, 5, 2),
+ newTestLogRows(20, 7, 3),
+ newTestLogRows(10, 9, 4),
+ }, 90, 1.9)
+}
+
+func newTestLogRows(streams, rowsPerStream int, seed int64) *LogRows {
+ streamTags := []string{
+ "some-stream-tag",
+ }
+ lr := GetLogRows(streamTags, nil)
+ rng := rand.New(rand.NewSource(seed))
+ var fields []Field
+ for i := 0; i < streams; i++ {
+ tenantID := TenantID{
+ AccountID: rng.Uint32(),
+ ProjectID: rng.Uint32(),
+ }
+ for j := 0; j < rowsPerStream; j++ {
+ // Add stream tags
+ fields = append(fields[:0], Field{
+ Name: "some-stream-tag",
+ Value: fmt.Sprintf("some-stream-value-%d", i),
+ })
+ // Add the remaining tags
+ for k := 0; k < 5; k++ {
+ if rng.Float64() < 0.5 {
+ fields = append(fields, Field{
+ Name: fmt.Sprintf("field_%d", k),
+ Value: fmt.Sprintf("value_%d_%d_%d", i, j, k),
+ })
+ }
+ }
+ // add a message field
+ fields = append(fields, Field{
+ Name: "",
+ Value: fmt.Sprintf("some row number %d at stream %d", j, i),
+ })
+ // add a field with constant value
+ fields = append(fields, Field{
+ Name: "job",
+ Value: "foobar",
+ })
+ // add a field with uint value
+ fields = append(fields, Field{
+ Name: "response_size_bytes",
+ Value: fmt.Sprintf("%d", rng.Intn(1234)),
+ })
+ // shuffle fields in order to check de-shuffling algorithm
+ rng.Shuffle(len(fields), func(i, j int) {
+ fields[i], fields[j] = fields[j], fields[i]
+ })
+ timestamp := rng.Int63()
+ lr.MustAdd(tenantID, timestamp, fields)
+ }
+ }
+ return lr
+}
+
+func checkEqualRows(lrResult, lrOrig *LogRows) error {
+ if len(lrResult.timestamps) != len(lrOrig.timestamps) {
+ return fmt.Errorf("unexpected length LogRows; got %d; want %d", len(lrResult.timestamps), len(lrOrig.timestamps))
+ }
+
+ sort.Sort(lrResult)
+ sort.Sort(lrOrig)
+
+ sortFieldNames := func(fields []Field) {
+ sort.Slice(fields, func(i, j int) bool {
+ return fields[i].Name < fields[j].Name
+ })
+ }
+ for i := range lrOrig.timestamps {
+ if !lrOrig.streamIDs[i].equal(&lrResult.streamIDs[i]) {
+ return fmt.Errorf("unexpected streamID for log entry %d\ngot\n%s\nwant\n%s", i, &lrResult.streamIDs[i], &lrOrig.streamIDs[i])
+ }
+ if lrOrig.timestamps[i] != lrResult.timestamps[i] {
+ return fmt.Errorf("unexpected timestamp for log entry %d\ngot\n%d\nwant\n%d", i, lrResult.timestamps[i], lrOrig.timestamps[i])
+ }
+ fieldsOrig := lrOrig.rows[i]
+ fieldsResult := lrResult.rows[i]
+ if len(fieldsOrig) != len(fieldsResult) {
+ return fmt.Errorf("unexpected number of fields at log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
+ }
+ sortFieldNames(fieldsOrig)
+ sortFieldNames(fieldsResult)
+ if !reflect.DeepEqual(fieldsOrig, fieldsResult) {
+ return fmt.Errorf("unexpected fields for log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
+ }
+ }
+ return nil
+}
+
+// readLogRows reads log entries from mp.
+//
+// This function is for testing and debugging purposes only.
+func (mp *inmemoryPart) readLogRows(sbu *stringsBlockUnmarshaler, vd *valuesDecoder) *LogRows {
+ lr := GetLogRows(nil, nil)
+ bsr := getBlockStreamReader()
+ defer putBlockStreamReader(bsr)
+ bsr.MustInitFromInmemoryPart(mp)
+ var tmp rows
+ for bsr.NextBlock() {
+ bd := &bsr.blockData
+ streamID := bd.streamID
+ if err := bd.unmarshalRows(&tmp, sbu, vd); err != nil {
+ logger.Panicf("BUG: cannot unmarshal log entries from inmemoryPart: %s", err)
+ }
+ for i, timestamp := range tmp.timestamps {
+ lr.MustAdd(streamID.tenantID, timestamp, tmp.rows[i])
+ lr.streamIDs[len(lr.streamIDs)-1] = streamID
+ }
+ tmp.reset()
+ }
+ return lr
+}
diff --git a/lib/logstorage/inmemory_part_timing_test.go b/lib/logstorage/inmemory_part_timing_test.go
new file mode 100644
index 000000000..ccebe4f14
--- /dev/null
+++ b/lib/logstorage/inmemory_part_timing_test.go
@@ -0,0 +1,34 @@
+package logstorage
+
+import (
+ "fmt"
+ "testing"
+)
+
+func BenchmarkInmemoryPart_MustInitFromRows(b *testing.B) {
+ for _, streams := range []int{1, 10, 100} {
+ b.Run(fmt.Sprintf("streams_%d", streams), func(b *testing.B) {
+ for _, rowsPerStream := range []int{1, 10, 100, 1000} {
+ b.Run(fmt.Sprintf("rowsPerStream_%d", rowsPerStream), func(b *testing.B) {
+ benchmarkInmemoryPartMustInitFromRows(b, streams, rowsPerStream)
+ })
+ }
+ })
+ }
+}
+
+func benchmarkInmemoryPartMustInitFromRows(b *testing.B, streams, rowsPerStream int) {
+ b.ReportAllocs()
+ b.SetBytes(int64(streams * rowsPerStream))
+ b.RunParallel(func(pb *testing.PB) {
+ lr := newTestLogRows(streams, rowsPerStream, 0)
+ mp := getInmemoryPart()
+ for pb.Next() {
+ mp.mustInitFromRows(lr)
+ if mp.ph.RowsCount != uint64(len(lr.timestamps)) {
+ panic(fmt.Errorf("unexpecte number of entries in the output stream; got %d; want %d", mp.ph.RowsCount, len(lr.timestamps)))
+ }
+ }
+ putInmemoryPart(mp)
+ })
+}
diff --git a/lib/logstorage/log_rows.go b/lib/logstorage/log_rows.go
new file mode 100644
index 000000000..2192fa0d2
--- /dev/null
+++ b/lib/logstorage/log_rows.go
@@ -0,0 +1,277 @@
+package logstorage
+
+import (
+ "sort"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+)
+
+// LogRows holds a set of rows needed for Storage.MustAddRows
+//
+// LogRows must be obtained via GetLogRows()
+type LogRows struct {
+ // buf holds all the bytes referred by items in LogRows
+ buf []byte
+
+ // fieldsBuf holds all the fields referred by items in LogRows
+ fieldsBuf []Field
+
+ // streamIDs holds streamIDs for rows added to LogRows
+ streamIDs []streamID
+
+ // streamTagsCanonicals holds streamTagsCanonical entries for rows added to LogRows
+ streamTagsCanonicals [][]byte
+
+ // timestamps holds stimestamps for rows added to LogRows
+ timestamps []int64
+
+ // rows holds fields for rows atted to LogRows.
+ rows [][]Field
+
+ // sf is a helper for sorting fields in every added row
+ sf sortedFields
+
+ // streamFields contains names for stream fields
+ streamFields map[string]struct{}
+
+ // ignoreFields contains names for log fields, which must be skipped during data ingestion
+ ignoreFields map[string]struct{}
+}
+
+type sortedFields []Field
+
+func (sf *sortedFields) Len() int {
+ return len(*sf)
+}
+
+func (sf *sortedFields) Less(i, j int) bool {
+ a := *sf
+ return a[i].Name < a[j].Name
+}
+
+func (sf *sortedFields) Swap(i, j int) {
+ a := *sf
+ a[i], a[j] = a[j], a[i]
+}
+
+// RowFormatter implementes fmt.Stringer for []Field aka a single log row
+type RowFormatter []Field
+
+// String returns user-readable representation for rf
+func (rf *RowFormatter) String() string {
+ b := append([]byte{}, '{')
+
+ fields := *rf
+ if len(fields) > 0 {
+ b = append(b, fields[0].String()...)
+ fields = fields[1:]
+ for _, field := range fields {
+ b = append(b, ',')
+ b = append(b, field.String()...)
+ }
+ }
+
+ b = append(b, '}')
+ return string(b)
+}
+
+// Reset resets lr
+func (lr *LogRows) Reset() {
+ lr.buf = lr.buf[:0]
+
+ fb := lr.fieldsBuf
+ for i := range fb {
+ fb[i].Reset()
+ }
+ lr.fieldsBuf = fb[:0]
+
+ sids := lr.streamIDs
+ for i := range sids {
+ sids[i].reset()
+ }
+ lr.streamIDs = sids[:0]
+
+ sns := lr.streamTagsCanonicals
+ for i := range sns {
+ sns[i] = nil
+ }
+ lr.streamTagsCanonicals = sns[:0]
+
+ lr.timestamps = lr.timestamps[:0]
+
+ rows := lr.rows
+ for i := range rows {
+ rows[i] = nil
+ }
+ lr.rows = rows[:0]
+
+ lr.sf = nil
+
+ sfs := lr.streamFields
+ for k := range sfs {
+ delete(sfs, k)
+ }
+
+ ifs := lr.ignoreFields
+ for k := range ifs {
+ delete(ifs, k)
+ }
+}
+
+// NeedFlush returns true if lr contains too much data, so it must be flushed to the storage.
+func (lr *LogRows) NeedFlush() bool {
+ return len(lr.buf) > (maxUncompressedBlockSize/8)*7
+}
+
+// MustAdd adds a log entry with the given args to lr.
+//
+// It is OK to modify the args after returning from the function,
+// since lr copies all the args to internal data.
+func (lr *LogRows) MustAdd(tenantID TenantID, timestamp int64, fields []Field) {
+ // Compose StreamTags from fields according to lr.streamFields
+ sfs := lr.streamFields
+ st := GetStreamTags()
+ for i := range fields {
+ f := &fields[i]
+ if _, ok := sfs[f.Name]; ok {
+ st.Add(f.Name, f.Value)
+ }
+ }
+
+ // Marshal StreamTags
+ bb := bbPool.Get()
+ bb.B = st.MarshalCanonical(bb.B)
+ PutStreamTags(st)
+
+ // Calculate the id for the StreamTags
+ var sid streamID
+ sid.tenantID = tenantID
+ sid.id = hash128(bb.B)
+
+ // Store the row
+ lr.mustAddInternal(sid, timestamp, fields, bb.B)
+ bbPool.Put(bb)
+}
+
+func (lr *LogRows) mustAddInternal(sid streamID, timestamp int64, fields []Field, streamTagsCanonical []byte) {
+ buf := lr.buf
+ bufLen := len(buf)
+ buf = append(buf, streamTagsCanonical...)
+
+ lr.streamTagsCanonicals = append(lr.streamTagsCanonicals, buf[bufLen:])
+ lr.streamIDs = append(lr.streamIDs, sid)
+ lr.timestamps = append(lr.timestamps, timestamp)
+
+ // Store all the fields
+ ifs := lr.ignoreFields
+ fb := lr.fieldsBuf
+ fieldsLen := len(fb)
+ for i := range fields {
+ f := &fields[i]
+
+ if _, ok := ifs[f.Name]; ok {
+ // Skip fields from the ifs map
+ continue
+ }
+ if f.Value == "" {
+ // Skip fields without values
+ continue
+ }
+
+ fb = append(fb, Field{})
+ dstField := &fb[len(fb)-1]
+
+ bufLen = len(buf)
+ if f.Name != "_msg" {
+ buf = append(buf, f.Name...)
+ }
+ dstField.Name = bytesutil.ToUnsafeString(buf[bufLen:])
+
+ bufLen = len(buf)
+ buf = append(buf, f.Value...)
+ dstField.Value = bytesutil.ToUnsafeString(buf[bufLen:])
+ }
+ lr.sf = fb[fieldsLen:]
+ sort.Sort(&lr.sf)
+ lr.rows = append(lr.rows, lr.sf)
+
+ lr.fieldsBuf = fb
+ lr.buf = buf
+}
+
+// GetLogRows returns LogRows from the pool for the given streamFields.
+//
+// streamFields is a set of field names, which must be associated with the stream.
+//
+// Return back it to the pool with PutLogRows() when it is no longer needed.
+func GetLogRows(streamFields, ignoreFields []string) *LogRows {
+ v := logRowsPool.Get()
+ if v == nil {
+ v = &LogRows{}
+ }
+ lr := v.(*LogRows)
+
+ // Initialize streamFields
+ sfs := lr.streamFields
+ if sfs == nil {
+ sfs = make(map[string]struct{}, len(streamFields))
+ lr.streamFields = sfs
+ }
+ for _, f := range streamFields {
+ sfs[f] = struct{}{}
+ }
+
+ // Initialize ignoreFields
+ ifs := lr.ignoreFields
+ if ifs == nil {
+ ifs = make(map[string]struct{}, len(ignoreFields))
+ lr.ignoreFields = ifs
+ }
+ for _, f := range ignoreFields {
+ if f != "" {
+ ifs[f] = struct{}{}
+ }
+ }
+
+ return lr
+}
+
+// PutLogRows returns lr to the pool.
+func PutLogRows(lr *LogRows) {
+ lr.Reset()
+ logRowsPool.Put(lr)
+}
+
+var logRowsPool sync.Pool
+
+// Len returns the number of items in lr.
+func (lr *LogRows) Len() int {
+ return len(lr.streamIDs)
+}
+
+// Less returns true if (streamID, timestamp) for row i is smaller than the (streamID, timestamp) for row j
+func (lr *LogRows) Less(i, j int) bool {
+ a := &lr.streamIDs[i]
+ b := &lr.streamIDs[j]
+ if !a.equal(b) {
+ return a.less(b)
+ }
+ return lr.timestamps[i] < lr.timestamps[j]
+}
+
+// Swap swaps rows i and j in lr.
+func (lr *LogRows) Swap(i, j int) {
+ a := &lr.streamIDs[i]
+ b := &lr.streamIDs[j]
+ *a, *b = *b, *a
+
+ tsA, tsB := &lr.timestamps[i], &lr.timestamps[j]
+ *tsA, *tsB = *tsB, *tsA
+
+ snA, snB := &lr.streamTagsCanonicals[i], &lr.streamTagsCanonicals[j]
+ *snA, *snB = *snB, *snA
+
+ fieldsA, fieldsB := &lr.rows[i], &lr.rows[j]
+ *fieldsA, *fieldsB = *fieldsB, *fieldsA
+}
diff --git a/lib/logstorage/log_rows_timing_test.go b/lib/logstorage/log_rows_timing_test.go
new file mode 100644
index 000000000..55a726e1b
--- /dev/null
+++ b/lib/logstorage/log_rows_timing_test.go
@@ -0,0 +1,83 @@
+package logstorage
+
+import (
+ "testing"
+)
+
+func BenchmarkLogRowsMustAdd(b *testing.B) {
+ rows := newBenchRows(map[string]string{
+ "input.type": "filestream",
+ "ecs.version": "8.0.0",
+ "host.hostname": "foobar-baz-abc",
+ "host.architecture": "x86_64",
+ "host.name": "foobar-baz-abc",
+ "host.os.codename": "bionic",
+ "host.os.type": "linux",
+ "host.os.platform": "ubuntu",
+ "host.os.version": "18.04.6 LTS (Bionic Beaver)",
+ "host.os.family": "debian",
+ "host.os.name": "Ubuntu",
+ "host.os.kernel": "4.15.0-211-generic",
+ "host.id": "a634d50249af449dbcb3ce724822568a",
+ "host.containerized": "false",
+ "host.ip": `["10.0.0.42","10.224.112.1","172.20.0.1","172.18.0.1","172.19.0.1","fc00:f853:ccd:e793::1","fe80::1","172.21.0.1","172.17.0.1"]`,
+ "host.mac": `["02-42-42-90-52-D9","02-42-C6-48-A6-84","02-42-FD-91-7E-17","52-54-00-F5-13-E7","54-E1-AD-89-1A-4C","F8-34-41-3C-C0-85"]`,
+ "agent.ephemeral_id": "6c251f67-7210-4cef-8f72-a9546cbb48cc",
+ "agent.id": "e97243c5-5ef3-4dc1-8828-504f68731e87",
+ "agent.name": "foobar-baz-abc",
+ "agent.type": "filebeat",
+ "agent.version": "8.8.0",
+ "log.file.path": "/var/log/auth.log",
+ "log.offset": "37908",
+ }, []string{
+ "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=0)",
+ "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=1)",
+ "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=2)",
+ "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=3)",
+ "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=4)",
+ })
+ streamFields := []string{
+ "host.hostname",
+ "agent.name",
+ "log.file.path",
+ }
+
+ b.ReportAllocs()
+ b.SetBytes(int64(len(rows)))
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ benchmarkLogRowsMustAdd(rows, streamFields)
+ }
+ })
+}
+
+func benchmarkLogRowsMustAdd(rows [][]Field, streamFields []string) {
+ lr := GetLogRows(streamFields, nil)
+ var tid TenantID
+ for i, fields := range rows {
+ tid.AccountID = uint32(i)
+ tid.ProjectID = uint32(2 * i)
+ timestamp := int64(i) * 1000
+ lr.MustAdd(tid, timestamp, fields)
+ }
+ PutLogRows(lr)
+}
+
+func newBenchRows(constFields map[string]string, messages []string) [][]Field {
+ rows := make([][]Field, 0, len(messages))
+ for _, msg := range messages {
+ row := make([]Field, 0, len(constFields)+1)
+ for k, v := range constFields {
+ row = append(row, Field{
+ Name: k,
+ Value: v,
+ })
+ }
+ row = append(row, Field{
+ Name: "_msg",
+ Value: msg,
+ })
+ rows = append(rows, row)
+ }
+ return rows
+}
diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go
new file mode 100644
index 000000000..9768939d2
--- /dev/null
+++ b/lib/logstorage/parser.go
@@ -0,0 +1,1100 @@
+package logstorage
+
+import (
+ "fmt"
+ "math"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
+)
+
+type lexer struct {
+ // s contains unparsed tail of sOrig
+ s string
+
+ // sOrig contains the original string
+ sOrig string
+
+ // token contains the current token
+ //
+ // an empty token means the end of s
+ token string
+
+ // rawToken contains raw token before unquoting
+ rawToken string
+
+ // prevToken contains the previously parsed token
+ prevToken string
+
+ // isSkippedSpace is set to true if there was a whitespace before the token in s
+ isSkippedSpace bool
+
+ // currentTimestamp is the current timestamp in nanoseconds
+ currentTimestamp int64
+}
+
+func newLexer(s string) *lexer {
+ return &lexer{
+ s: s,
+ sOrig: s,
+ currentTimestamp: time.Now().UnixNano(),
+ }
+}
+
+func (lex *lexer) isEnd() bool {
+ return len(lex.s) == 0 && len(lex.token) == 0 && len(lex.rawToken) == 0
+}
+
+func (lex *lexer) isQuotedToken() bool {
+ return lex.token != lex.rawToken
+}
+
+func (lex *lexer) isPrevToken(tokens ...string) bool {
+ for _, token := range tokens {
+ if token == lex.prevToken {
+ return true
+ }
+ }
+ return false
+}
+
+func (lex *lexer) isKeyword(keywords ...string) bool {
+ if lex.isQuotedToken() {
+ return false
+ }
+ tokenLower := strings.ToLower(lex.token)
+ for _, kw := range keywords {
+ if kw == tokenLower {
+ return true
+ }
+ }
+ return false
+}
+
+func (lex *lexer) context() string {
+ tail := lex.sOrig
+ tail = tail[:len(tail)-len(lex.s)]
+ if len(tail) > 50 {
+ tail = tail[len(tail)-50:]
+ }
+ return tail
+}
+
+func (lex *lexer) mustNextToken() bool {
+ lex.nextToken()
+ return !lex.isEnd()
+}
+
+func (lex *lexer) nextCharToken(s string, size int) {
+ lex.token = s[:size]
+ lex.rawToken = lex.token
+ lex.s = s[size:]
+}
+
+// nextToken updates lex.token to the next token.
+func (lex *lexer) nextToken() {
+ s := lex.s
+ lex.prevToken = lex.token
+ lex.token = ""
+ lex.rawToken = ""
+ lex.isSkippedSpace = false
+ if len(s) == 0 {
+ return
+ }
+ r, size := utf8.DecodeRuneInString(s)
+ if r == utf8.RuneError {
+ lex.nextCharToken(s, size)
+ return
+ }
+
+ // Skip whitespace
+ for unicode.IsSpace(r) {
+ lex.isSkippedSpace = true
+ s = s[size:]
+ r, size = utf8.DecodeRuneInString(s)
+ }
+
+ // Try decoding simple token
+ tokenLen := 0
+ for isTokenRune(r) || r == '.' {
+ tokenLen += size
+ r, size = utf8.DecodeRuneInString(s[tokenLen:])
+ }
+ if tokenLen > 0 {
+ lex.nextCharToken(s, tokenLen)
+ return
+ }
+
+ switch r {
+ case '"', '`':
+ prefix, err := strconv.QuotedPrefix(s)
+ if err != nil {
+ lex.nextCharToken(s, 1)
+ return
+ }
+ token, err := strconv.Unquote(prefix)
+ if err != nil {
+ lex.nextCharToken(s, 1)
+ return
+ }
+ lex.token = token
+ lex.rawToken = prefix
+ lex.s = s[len(prefix):]
+ return
+ case '\'':
+ var b []byte
+ for !strings.HasPrefix(s[size:], "'") {
+ ch, _, newTail, err := strconv.UnquoteChar(s[size:], '\'')
+ if err != nil {
+ lex.nextCharToken(s, 1)
+ return
+ }
+ b = utf8.AppendRune(b, ch)
+ size += len(s[size:]) - len(newTail)
+ }
+ size++
+ lex.token = string(b)
+ lex.rawToken = string(s[:size])
+ lex.s = s[size:]
+ return
+ case '=':
+ if strings.HasPrefix(s[size:], "~") {
+ lex.nextCharToken(s, 2)
+ return
+ }
+ lex.nextCharToken(s, 1)
+ return
+ case '!':
+ if strings.HasPrefix(s[size:], "~") || strings.HasPrefix(s[size:], "=") {
+ lex.nextCharToken(s, 2)
+ return
+ }
+ lex.nextCharToken(s, 1)
+ return
+ default:
+ lex.nextCharToken(s, size)
+ return
+ }
+}
+
+// Query represents LogsQL query.
+type Query struct {
+ f filter
+}
+
+// String returns string representation for q.
+func (q *Query) String() string {
+ return q.f.String()
+}
+
+func (q *Query) getResultColumnNames() []string {
+ m := make(map[string]struct{})
+ q.f.updateReferencedColumnNames(m)
+
+ // Substitute an empty column name with _msg column
+ if _, ok := m[""]; ok {
+ delete(m, "")
+ m["_msg"] = struct{}{}
+ }
+
+ // unconditionally select _time, _stream and _msg columns
+ // TODO: add the ability to filter out these columns
+ m["_time"] = struct{}{}
+ m["_stream"] = struct{}{}
+ m["_msg"] = struct{}{}
+
+ columnNames := make([]string, 0, len(m))
+ for k := range m {
+ columnNames = append(columnNames, k)
+ }
+ sort.Strings(columnNames)
+ return columnNames
+}
+
+// ParseQuery parses s.
+func ParseQuery(s string) (*Query, error) {
+ lex := newLexer(s)
+
+ f, err := parseFilter(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse filter expression: %w; context: %s", err, lex.context())
+ }
+ if !lex.isEnd() {
+ return nil, fmt.Errorf("unexpected tail: %q", lex.s)
+ }
+
+ q := &Query{
+ f: f,
+ }
+ return q, nil
+}
+
+func parseFilter(lex *lexer) (filter, error) {
+ if !lex.mustNextToken() || lex.isKeyword("|") {
+ return nil, fmt.Errorf("missing query")
+ }
+ af, err := parseOrFilter(lex, "")
+ if err != nil {
+ return nil, err
+ }
+ return af, nil
+}
+
+func parseOrFilter(lex *lexer, fieldName string) (filter, error) {
+ var filters []filter
+ for {
+ f, err := parseAndFilter(lex, fieldName)
+ if err != nil {
+ return nil, err
+ }
+ filters = append(filters, f)
+ switch {
+ case lex.isKeyword("|", ")", ""):
+ if len(filters) == 1 {
+ return filters[0], nil
+ }
+ of := &orFilter{
+ filters: filters,
+ }
+ return of, nil
+ case lex.isKeyword("or"):
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filter after 'or'")
+ }
+ }
+ }
+}
+
+func parseAndFilter(lex *lexer, fieldName string) (filter, error) {
+ var filters []filter
+ for {
+ f, err := parseGenericFilter(lex, fieldName)
+ if err != nil {
+ return nil, err
+ }
+ filters = append(filters, f)
+ switch {
+ case lex.isKeyword("or", "|", ")", ""):
+ if len(filters) == 1 {
+ return filters[0], nil
+ }
+ af := &andFilter{
+ filters: filters,
+ }
+ return af, nil
+ case lex.isKeyword("and"):
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filter after 'and'")
+ }
+ }
+ }
+}
+
+func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
+ // Check for special keywords
+ switch {
+ case lex.isKeyword(":"):
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filter after ':'")
+ }
+ return parseGenericFilter(lex, fieldName)
+ case lex.isKeyword("*"):
+ lex.nextToken()
+ f := &prefixFilter{
+ fieldName: fieldName,
+ prefix: "",
+ }
+ return f, nil
+ case lex.isKeyword("("):
+ if !lex.isSkippedSpace && !lex.isPrevToken("", ":", "(", "!", "not") {
+ return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken)
+ }
+ return parseParensFilter(lex, fieldName)
+ case lex.isKeyword("not", "!"):
+ return parseNotFilter(lex, fieldName)
+ case lex.isKeyword("exact"):
+ return parseExactFilter(lex, fieldName)
+ case lex.isKeyword("exact_prefix"):
+ return parseExactPrefixFilter(lex, fieldName)
+ case lex.isKeyword("i"):
+ return parseAnyCaseFilter(lex, fieldName)
+ case lex.isKeyword("in"):
+ return parseInFilter(lex, fieldName)
+ case lex.isKeyword("ipv4_range"):
+ return parseIPv4RangeFilter(lex, fieldName)
+ case lex.isKeyword("len_range"):
+ return parseLenRangeFilter(lex, fieldName)
+ case lex.isKeyword("range"):
+ return parseRangeFilter(lex, fieldName)
+ case lex.isKeyword("re"):
+ return parseRegexpFilter(lex, fieldName)
+ case lex.isKeyword("seq"):
+ return parseSequenceFilter(lex, fieldName)
+ case lex.isKeyword("string_range"):
+ return parseStringRangeFilter(lex, fieldName)
+ case lex.isKeyword(`"`, "'", "`"):
+ return nil, fmt.Errorf("improperly quoted string")
+ case lex.isKeyword(",", ")", "[", "]"):
+ return nil, fmt.Errorf("unexpected token %q", lex.token)
+ }
+ phrase := getCompoundPhrase(lex, fieldName)
+ return parseFilterForPhrase(lex, phrase, fieldName)
+}
+
+func getCompoundPhrase(lex *lexer, fieldName string) string {
+ phrase := lex.token
+ rawPhrase := lex.rawToken
+ lex.nextToken()
+ suffix := getCompoundSuffix(lex, fieldName)
+ if suffix == "" {
+ return phrase
+ }
+ return rawPhrase + suffix
+}
+
+func getCompoundSuffix(lex *lexer, fieldName string) string {
+ s := ""
+ stopTokens := []string{"*", ",", "(", ")", "[", "]", "|", ""}
+ if fieldName == "" {
+ stopTokens = append(stopTokens, ":")
+ }
+ for !lex.isSkippedSpace && !lex.isKeyword(stopTokens...) {
+ s += lex.rawToken
+ lex.nextToken()
+ }
+ return s
+}
+
+func getCompoundToken(lex *lexer) string {
+ s := lex.token
+ rawS := lex.rawToken
+ lex.nextToken()
+ suffix := ""
+ for !lex.isSkippedSpace && !lex.isKeyword(",", "(", ")", "[", "]", "|", "") {
+ s += lex.token
+ lex.nextToken()
+ }
+ if suffix == "" {
+ return s
+ }
+ return rawS + suffix
+}
+
+func getCompoundFuncArg(lex *lexer) string {
+ if lex.isKeyword("*") {
+ return ""
+ }
+ arg := lex.token
+ rawArg := lex.rawToken
+ lex.nextToken()
+ suffix := ""
+ for !lex.isSkippedSpace && !lex.isKeyword("*", ",", ")", "") {
+ suffix += lex.rawToken
+ lex.nextToken()
+ }
+ if suffix == "" {
+ return arg
+ }
+ return rawArg + suffix
+}
+
+func parseFilterForPhrase(lex *lexer, phrase, fieldName string) (filter, error) {
+ if fieldName != "" || !lex.isKeyword(":") {
+ // The phrase is either a search phrase or a search prefix.
+ if lex.isKeyword("*") && !lex.isSkippedSpace {
+ // The phrase is a search prefix in the form `foo*`.
+ lex.nextToken()
+ f := &prefixFilter{
+ fieldName: fieldName,
+ prefix: phrase,
+ }
+ return f, nil
+ }
+ // The phrase is a search phrase.
+ f := &phraseFilter{
+ fieldName: fieldName,
+ phrase: phrase,
+ }
+ return f, nil
+ }
+
+ // The phrase contains the field name.
+ fieldName = phrase
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filter after field name %s", quoteTokenIfNeeded(fieldName))
+ }
+ switch fieldName {
+ case "_time":
+ return parseTimeFilter(lex)
+ case "_stream":
+ return parseStreamFilter(lex)
+ default:
+ return parseGenericFilter(lex, fieldName)
+ }
+}
+
+func parseParensFilter(lex *lexer, fieldName string) (filter, error) {
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filter after '('")
+ }
+ f, err := parseOrFilter(lex, fieldName)
+ if err != nil {
+ return nil, err
+ }
+ if !lex.isKeyword(")") {
+ return nil, fmt.Errorf("unexpected token %q instead of ')'", lex.token)
+ }
+ lex.nextToken()
+ return f, nil
+}
+
+func parseNotFilter(lex *lexer, fieldName string) (filter, error) {
+ notKeyword := lex.token
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing filters after '%s'", notKeyword)
+ }
+ f, err := parseGenericFilter(lex, fieldName)
+ if err != nil {
+ return nil, err
+ }
+ nf, ok := f.(*notFilter)
+ if ok {
+ return nf.f, nil
+ }
+ nf = ¬Filter{
+ f: f,
+ }
+ return nf, nil
+}
+
+func parseAnyCaseFilter(lex *lexer, fieldName string) (filter, error) {
+ phrase := lex.token
+ lex.nextToken()
+ if !lex.isKeyword("(") {
+ phrase += getCompoundSuffix(lex, fieldName)
+ return parseFilterForPhrase(lex, phrase, fieldName)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing arg for i()")
+ }
+ phrase = getCompoundFuncArg(lex)
+ isPrefixFilter := false
+ if lex.isKeyword("*") && !lex.isSkippedSpace {
+ isPrefixFilter = true
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing ')' after i()")
+ }
+ }
+ if !lex.isKeyword(")") {
+ return nil, fmt.Errorf("unexpected token %q instead of ')' in i()", lex.token)
+ }
+ lex.nextToken()
+
+ if isPrefixFilter {
+ f := &anyCasePrefixFilter{
+ fieldName: fieldName,
+ prefix: phrase,
+ }
+ return f, nil
+ }
+ f := &anyCasePhraseFilter{
+ fieldName: fieldName,
+ phrase: phrase,
+ }
+ return f, nil
+}
+
+func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
+ funcName := lex.token
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ if len(args) != 2 {
+ return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args))
+ }
+ minLen, err := strconv.ParseUint(args[0], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err)
+ }
+ maxLen, err := strconv.ParseUint(args[1], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err)
+ }
+ rf := &lenRangeFilter{
+ fieldName: fieldName,
+ minLen: minLen,
+ maxLen: maxLen,
+ }
+ return rf, nil
+ })
+}
+
+func parseStringRangeFilter(lex *lexer, fieldName string) (filter, error) {
+ funcName := lex.token
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ if len(args) != 2 {
+ return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args))
+ }
+ rf := &stringRangeFilter{
+ fieldName: fieldName,
+ minValue: args[0],
+ maxValue: args[1],
+ }
+ return rf, nil
+ })
+}
+
+func parseIPv4RangeFilter(lex *lexer, fieldName string) (filter, error) {
+ funcName := lex.token
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ if len(args) == 1 {
+ minValue, maxValue, ok := tryParseIPv4CIDR(args[0])
+ if !ok {
+ return nil, fmt.Errorf("cannot parse IPv4 address or IPv4 CIDR %q at %s()", args[0], funcName)
+ }
+ rf := &ipv4RangeFilter{
+ fieldName: fieldName,
+ minValue: minValue,
+ maxValue: maxValue,
+ }
+ return rf, nil
+ }
+ if len(args) != 2 {
+ return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args))
+ }
+ minValue, ok := tryParseIPv4(args[0])
+ if !ok {
+ return nil, fmt.Errorf("cannot parse lower bound ip %q in %s()", funcName, args[0])
+ }
+ maxValue, ok := tryParseIPv4(args[1])
+ if !ok {
+ return nil, fmt.Errorf("cannot parse upper bound ip %q in %s()", funcName, args[1])
+ }
+ rf := &ipv4RangeFilter{
+ fieldName: fieldName,
+ minValue: minValue,
+ maxValue: maxValue,
+ }
+ return rf, nil
+ })
+}
+
+func tryParseIPv4CIDR(s string) (uint32, uint32, bool) {
+ n := strings.IndexByte(s, '/')
+ if n < 0 {
+ n, ok := tryParseIPv4(s)
+ return n, n, ok
+ }
+ ip, ok := tryParseIPv4(s[:n])
+ if !ok {
+ return 0, 0, false
+ }
+ maskBits, ok := tryParseUint64(s[n+1:])
+ if !ok || maskBits > 32 {
+ return 0, 0, false
+ }
+ mask := uint32((1 << (32 - maskBits)) - 1)
+ minValue := ip &^ mask
+ maxValue := ip | mask
+ return minValue, maxValue, true
+}
+
+func parseInFilter(lex *lexer, fieldName string) (filter, error) {
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ f := &inFilter{
+ fieldName: fieldName,
+ values: args,
+ }
+ return f, nil
+ })
+}
+
+func parseSequenceFilter(lex *lexer, fieldName string) (filter, error) {
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ sf := &sequenceFilter{
+ fieldName: fieldName,
+ phrases: args,
+ }
+ return sf, nil
+ })
+}
+
+func parseExactFilter(lex *lexer, fieldName string) (filter, error) {
+ return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
+ ef := &exactFilter{
+ fieldName: fieldName,
+ value: arg,
+ }
+ return ef, nil
+ })
+}
+
+func parseExactPrefixFilter(lex *lexer, fieldName string) (filter, error) {
+ return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
+ ef := &exactPrefixFilter{
+ fieldName: fieldName,
+ prefix: arg,
+ }
+ return ef, nil
+ })
+}
+
+func parseRegexpFilter(lex *lexer, fieldName string) (filter, error) {
+ funcName := lex.token
+ return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
+ re, err := regexp.Compile(arg)
+ if err != nil {
+ return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err)
+ }
+ rf := ®expFilter{
+ fieldName: fieldName,
+ re: re,
+ }
+ return rf, nil
+ })
+}
+
+func parseRangeFilter(lex *lexer, fieldName string) (filter, error) {
+ funcName := lex.token
+ lex.nextToken()
+
+ // Parse minValue
+ includeMinValue := false
+ switch {
+ case lex.isKeyword("("):
+ includeMinValue = false
+ case lex.isKeyword("["):
+ includeMinValue = true
+ default:
+ phrase := funcName + getCompoundSuffix(lex, fieldName)
+ return parseFilterForPhrase(lex, phrase, fieldName)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing args for %s()", funcName)
+ }
+ minValue, minValueStr, err := parseFloat64(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse minValue in %s(): %w", funcName, err)
+ }
+
+ // Parse comma
+ if !lex.isKeyword(",") {
+ return nil, fmt.Errorf("unexpected token %q ater %q in %s(); want ','", lex.token, minValueStr, funcName)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing maxValue in %s()", funcName)
+ }
+
+ // Parse maxValue
+ maxValue, maxValueStr, err := parseFloat64(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse maxValue in %s(): %w", funcName, err)
+ }
+ includeMaxValue := false
+ switch {
+ case lex.isKeyword(")"):
+ includeMaxValue = false
+ case lex.isKeyword("]"):
+ includeMaxValue = true
+ default:
+ return nil, fmt.Errorf("unexpected closing token %q in %s(); want ')' or ']'", lex.token, funcName)
+ }
+ lex.nextToken()
+
+ stringRepr := ""
+ if includeMinValue {
+ stringRepr += "["
+ } else {
+ stringRepr += "("
+ minValue = math.Nextafter(minValue, math.Inf(1))
+ }
+ stringRepr += minValueStr + "," + maxValueStr
+ if includeMaxValue {
+ stringRepr += "]"
+ } else {
+ stringRepr += ")"
+ maxValue = math.Nextafter(maxValue, math.Inf(-1))
+ }
+
+ rf := &rangeFilter{
+ fieldName: fieldName,
+ minValue: minValue,
+ maxValue: maxValue,
+
+ stringRepr: stringRepr,
+ }
+ return rf, nil
+}
+
+func parseFloat64(lex *lexer) (float64, string, error) {
+ s := getCompoundToken(lex)
+ f, err := strconv.ParseFloat(s, 64)
+ if err != nil {
+ return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err)
+ }
+ return f, s, nil
+}
+
+func parseFuncArg(lex *lexer, fieldName string, callback func(args string) (filter, error)) (filter, error) {
+ funcName := lex.token
+ return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
+ if len(args) != 1 {
+ return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 1", funcName, len(args))
+ }
+ return callback(args[0])
+ })
+}
+
+func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (filter, error)) (filter, error) {
+ funcName := lex.token
+ lex.nextToken()
+ if !lex.isKeyword("(") {
+ phrase := funcName + getCompoundSuffix(lex, fieldName)
+ return parseFilterForPhrase(lex, phrase, fieldName)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing args for %s()", funcName)
+ }
+ var args []string
+ for !lex.isKeyword(")") {
+ if lex.isKeyword(",") {
+ return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName)
+ }
+ arg := getCompoundFuncArg(lex)
+ args = append(args, arg)
+ if lex.isKeyword(")") {
+ break
+ }
+ if !lex.isKeyword(",") {
+ return nil, fmt.Errorf("missing ',' after %q in %s()", arg, funcName)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing the next arg after %q in %s()", arg, funcName)
+ }
+ }
+ lex.nextToken()
+
+ return callback(args)
+}
+
+func parseTimeFilter(lex *lexer) (*timeFilter, error) {
+ startTimeInclude := false
+ switch {
+ case lex.isKeyword("["):
+ startTimeInclude = true
+ case lex.isKeyword("("):
+ startTimeInclude = false
+ default:
+ // Try parsing '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
+ startTime, stringRepr, err := parseTime(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse _time filter: %w", err)
+ }
+ endTime := getMatchingEndTime(startTime, stringRepr)
+ tf := &timeFilter{
+ minTimestamp: startTime,
+ maxTimestamp: endTime,
+
+ stringRepr: stringRepr,
+ }
+ return tf, nil
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing start time in _time filter")
+ }
+
+ // Parse start time
+ startTime, startTimeString, err := parseTime(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse start time in _time filter: %w", err)
+ }
+ if !lex.isKeyword(",") {
+ return nil, fmt.Errorf("unexpected token after start time in _time filter: %q; want ','", lex.token)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing end time in _time filter")
+ }
+
+ // Parse end time
+ endTime, endTimeString, err := parseTime(lex)
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse end time in _time filter: %w", err)
+ }
+
+ endTimeInclude := false
+ switch {
+ case lex.isKeyword("]"):
+ endTimeInclude = true
+ case lex.isKeyword(")"):
+ endTimeInclude = false
+ default:
+ return nil, fmt.Errorf("_time filter ends with unexpected token %q; it must end with ']' or ')'", lex.token)
+ }
+ lex.nextToken()
+
+ stringRepr := ""
+ if startTimeInclude {
+ stringRepr += "["
+ } else {
+ stringRepr += "("
+ startTime++
+ }
+ stringRepr += startTimeString + "," + endTimeString
+ if endTimeInclude {
+ stringRepr += "]"
+ endTime = getMatchingEndTime(endTime, endTimeString)
+ } else {
+ stringRepr += ")"
+ endTime--
+ }
+
+ tf := &timeFilter{
+ minTimestamp: startTime,
+ maxTimestamp: endTime,
+
+ stringRepr: stringRepr,
+ }
+ return tf, nil
+}
+
+func getMatchingEndTime(startTime int64, stringRepr string) int64 {
+ tStart := time.Unix(0, startTime).UTC()
+ tEnd := tStart
+ timeStr := stripTimezoneSuffix(stringRepr)
+ switch {
+ case len(timeStr) == len("YYYY"):
+ y, m, d := tStart.Date()
+ nsec := startTime % (24 * 3600 * 1e9)
+ tEnd = time.Date(y+1, m, d, 0, 0, int(nsec/1e9), int(nsec%1e9), time.UTC)
+ case len(timeStr) == len("YYYY-MM") && timeStr[len("YYYY")] == '-':
+ y, m, d := tStart.Date()
+ nsec := startTime % (24 * 3600 * 1e9)
+ if d != 1 {
+ d = 0
+ m++
+ }
+ tEnd = time.Date(y, m+1, d, 0, 0, int(nsec/1e9), int(nsec%1e9), time.UTC)
+ case len(timeStr) == len("YYYY-MM-DD") && timeStr[len("YYYY")] == '-':
+ tEnd = tStart.Add(24 * time.Hour)
+ case len(timeStr) == len("YYYY-MM-DDThh") && timeStr[len("YYYY")] == '-':
+ tEnd = tStart.Add(time.Hour)
+ case len(timeStr) == len("YYYY-MM-DDThh:mm") && timeStr[len("YYYY")] == '-':
+ tEnd = tStart.Add(time.Minute)
+ case len(timeStr) == len("YYYY-MM-DDThh:mm:ss") && timeStr[len("YYYY")] == '-':
+ tEnd = tStart.Add(time.Second)
+ default:
+ tEnd = tStart.Add(time.Nanosecond)
+ }
+ return tEnd.UnixNano() - 1
+}
+
+func stripTimezoneSuffix(s string) string {
+ if strings.HasSuffix(s, "Z") {
+ return s[:len(s)-1]
+ }
+ if len(s) < 6 {
+ return s
+ }
+ tz := s[len(s)-6:]
+ if tz[0] != '-' && tz[0] != '+' {
+ return s
+ }
+ if tz[3] != ':' {
+ return s
+ }
+ return s[:len(s)-len(tz)]
+}
+
+func parseStreamFilter(lex *lexer) (*streamFilter, error) {
+ if !lex.isKeyword("{") {
+ return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
+ }
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("incomplete _stream filter after '{'")
+ }
+ var filters []*andStreamFilter
+ for {
+ f, err := parseAndStreamFilter(lex)
+ if err != nil {
+ return nil, err
+ }
+ filters = append(filters, f)
+ switch {
+ case lex.isKeyword("}"):
+ lex.nextToken()
+ sf := &streamFilter{
+ f: &StreamFilter{
+ orFilters: filters,
+ },
+ }
+ return sf, nil
+ case lex.isKeyword("or"):
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("incomplete _stream filter after 'or'")
+ }
+ if lex.isKeyword("}") {
+ return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
+ }
+ default:
+ return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
+ }
+ }
+}
+
+func newStreamFilter(s string) (*StreamFilter, error) {
+ lex := newLexer(s)
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing '{' in _stream filter")
+ }
+ sf, err := parseStreamFilter(lex)
+ if err != nil {
+ return nil, err
+ }
+ return sf.f, nil
+}
+
+func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
+ var filters []*streamTagFilter
+ for {
+ if lex.isKeyword("}") {
+ asf := &andStreamFilter{
+ tagFilters: filters,
+ }
+ return asf, nil
+ }
+ f, err := parseStreamTagFilter(lex)
+ if err != nil {
+ return nil, err
+ }
+ filters = append(filters, f)
+ switch {
+ case lex.isKeyword("or", "}"):
+ asf := &andStreamFilter{
+ tagFilters: filters,
+ }
+ return asf, nil
+ case lex.isKeyword(","):
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing stream filter after ','")
+ }
+ default:
+ return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
+ }
+ }
+}
+
+func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
+ tagName := lex.token
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
+ }
+ if !lex.isKeyword("=", "!=", "=~", "!~") {
+ return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
+ }
+ op := lex.token
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
+ }
+ value := lex.token
+ if !lex.mustNextToken() {
+ return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
+ }
+ stf := &streamTagFilter{
+ tagName: tagName,
+ op: op,
+ value: value,
+ }
+ return stf, nil
+}
+
+func parseTime(lex *lexer) (int64, string, error) {
+ s := getCompoundToken(lex)
+ t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9)
+ if err != nil {
+ return 0, "", err
+ }
+ return int64(t * 1e9), s, nil
+}
+
+func quoteTokenIfNeeded(s string) string {
+ if !needQuoteToken(s) {
+ return s
+ }
+ return strconv.Quote(s)
+}
+
+func needQuoteToken(s string) bool {
+ sLower := strings.ToLower(s)
+ if _, ok := reservedKeywords[sLower]; ok {
+ return true
+ }
+ for _, r := range s {
+ if !isTokenRune(r) && r != '.' && r != '-' {
+ return true
+ }
+ }
+ return false
+}
+
+var reservedKeywords = func() map[string]struct{} {
+ kws := []string{
+ // An empty keyword means end of parsed string
+ "",
+
+ // boolean operator tokens for 'foo and bar or baz not xxx'
+ "and",
+ "or",
+ "not",
+ "!", // synonym for "not"
+
+ // parens for '(foo or bar) and baz'
+ "(",
+ ")",
+
+ // stream filter tokens for '_stream:{foo=~"bar", baz="a"}'
+ "{",
+ "}",
+ "=",
+ "!=",
+ "=~",
+ "!~",
+ ",",
+
+ // delimiter between query parts:
+ // 'foo and bar | extract "<*> foo