From 87b66db47da23cb9fe800ce16efa6bdac365f5a8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 19 Jun 2023 22:55:12 -0700 Subject: [PATCH] app/victoria-logs: initial code release --- Makefile | 62 +- app/victoria-logs/Makefile | 103 + app/victoria-logs/deployment/Dockerfile | 8 + app/victoria-logs/main.go | 102 + app/victoria-logs/multiarch/Dockerfile | 12 + app/vlinsert/elasticsearch/bulk_response.qtpl | 20 + .../elasticsearch/bulk_response.qtpl.go | 69 + app/vlinsert/elasticsearch/elasticsearch.go | 410 + .../elasticsearch/elasticsearch_test.go | 97 + .../elasticsearch_timing_test.go | 50 + app/vlinsert/main.go | 34 + app/vlselect/logsql/logsql.go | 53 + app/vlselect/logsql/query_response.qtpl | 20 + app/vlselect/logsql/query_response.qtpl.go | 90 + app/vlselect/main.go | 140 + app/vlstorage/main.go | 149 + docs/VictoriaLogs/LogsQL.md | 1087 ++ docs/VictoriaLogs/README.md | 481 + docs/VictoriaLogs/Roadmap.md | 37 + docs/VictoriaLogs/keyConcepts.md | 219 + lib/logstorage/arena.go | 31 + lib/logstorage/block.go | 650 ++ lib/logstorage/block_data.go | 383 + lib/logstorage/block_data_test.go | 106 + lib/logstorage/block_header.go | 766 ++ lib/logstorage/block_header_test.go | 454 + lib/logstorage/block_search.go | 645 ++ lib/logstorage/block_stream_merger.go | 288 + lib/logstorage/block_stream_reader.go | 383 + lib/logstorage/block_stream_writer.go | 362 + lib/logstorage/block_test.go | 179 + lib/logstorage/block_timing_test.go | 46 + lib/logstorage/bloomfilter.go | 176 + lib/logstorage/bloomfilter_test.go | 84 + lib/logstorage/consts.go | 32 + lib/logstorage/datadb.go | 990 ++ lib/logstorage/datadb_test.go | 91 + lib/logstorage/encoding.go | 314 + lib/logstorage/encoding_test.go | 86 + lib/logstorage/encoding_timing_test.go | 73 + lib/logstorage/filenames.go | 22 + lib/logstorage/filters.go | 3053 ++++++ lib/logstorage/filters_test.go | 9296 +++++++++++++++++ lib/logstorage/hash128.go | 38 + lib/logstorage/hash128_test.go | 24 + lib/logstorage/hash128_timing_test.go | 29 + lib/logstorage/index_block_header.go | 164 + lib/logstorage/index_block_header_test.go | 138 + lib/logstorage/indexdb.go | 900 ++ lib/logstorage/indexdb_test.go | 253 + lib/logstorage/inmemory_part.go | 155 + lib/logstorage/inmemory_part_test.go | 343 + lib/logstorage/inmemory_part_timing_test.go | 34 + lib/logstorage/log_rows.go | 277 + lib/logstorage/log_rows_timing_test.go | 83 + lib/logstorage/parser.go | 1100 ++ lib/logstorage/parser_test.go | 966 ++ lib/logstorage/part.go | 102 + lib/logstorage/part_header.go | 84 + lib/logstorage/part_header_test.go | 21 + lib/logstorage/partition.go | 237 + lib/logstorage/partition_test.go | 187 + lib/logstorage/rows.go | 123 + lib/logstorage/rows_test.go | 287 + lib/logstorage/storage.go | 532 + lib/logstorage/storage_search.go | 602 ++ lib/logstorage/storage_search_test.go | 663 ++ lib/logstorage/storage_test.go | 102 + lib/logstorage/stream_filter.go | 90 + lib/logstorage/stream_id.go | 69 + lib/logstorage/stream_id_test.go | 172 + lib/logstorage/stream_tags.go | 298 + lib/logstorage/tenant_id.go | 91 + lib/logstorage/tenant_id_test.go | 124 + lib/logstorage/tokenizer.go | 153 + lib/logstorage/tokenizer_test.go | 29 + lib/logstorage/tokenizer_timing_test.go | 19 + lib/logstorage/u128.go | 50 + lib/logstorage/u128_test.go | 127 + lib/logstorage/values_encoder.go | 742 ++ lib/logstorage/values_encoder_test.go | 228 + lib/logstorage/values_encoder_timing_test.go | 98 + 82 files changed, 31486 insertions(+), 1 deletion(-) create mode 100644 app/victoria-logs/Makefile create mode 100644 app/victoria-logs/deployment/Dockerfile create mode 100644 app/victoria-logs/main.go create mode 100644 app/victoria-logs/multiarch/Dockerfile create mode 100644 app/vlinsert/elasticsearch/bulk_response.qtpl create mode 100644 app/vlinsert/elasticsearch/bulk_response.qtpl.go create mode 100644 app/vlinsert/elasticsearch/elasticsearch.go create mode 100644 app/vlinsert/elasticsearch/elasticsearch_test.go create mode 100644 app/vlinsert/elasticsearch/elasticsearch_timing_test.go create mode 100644 app/vlinsert/main.go create mode 100644 app/vlselect/logsql/logsql.go create mode 100644 app/vlselect/logsql/query_response.qtpl create mode 100644 app/vlselect/logsql/query_response.qtpl.go create mode 100644 app/vlselect/main.go create mode 100644 app/vlstorage/main.go create mode 100644 docs/VictoriaLogs/LogsQL.md create mode 100644 docs/VictoriaLogs/README.md create mode 100644 docs/VictoriaLogs/Roadmap.md create mode 100644 docs/VictoriaLogs/keyConcepts.md create mode 100644 lib/logstorage/arena.go create mode 100644 lib/logstorage/block.go create mode 100644 lib/logstorage/block_data.go create mode 100644 lib/logstorage/block_data_test.go create mode 100644 lib/logstorage/block_header.go create mode 100644 lib/logstorage/block_header_test.go create mode 100644 lib/logstorage/block_search.go create mode 100644 lib/logstorage/block_stream_merger.go create mode 100644 lib/logstorage/block_stream_reader.go create mode 100644 lib/logstorage/block_stream_writer.go create mode 100644 lib/logstorage/block_test.go create mode 100644 lib/logstorage/block_timing_test.go create mode 100644 lib/logstorage/bloomfilter.go create mode 100644 lib/logstorage/bloomfilter_test.go create mode 100644 lib/logstorage/consts.go create mode 100644 lib/logstorage/datadb.go create mode 100644 lib/logstorage/datadb_test.go create mode 100644 lib/logstorage/encoding.go create mode 100644 lib/logstorage/encoding_test.go create mode 100644 lib/logstorage/encoding_timing_test.go create mode 100644 lib/logstorage/filenames.go create mode 100644 lib/logstorage/filters.go create mode 100644 lib/logstorage/filters_test.go create mode 100644 lib/logstorage/hash128.go create mode 100644 lib/logstorage/hash128_test.go create mode 100644 lib/logstorage/hash128_timing_test.go create mode 100644 lib/logstorage/index_block_header.go create mode 100644 lib/logstorage/index_block_header_test.go create mode 100644 lib/logstorage/indexdb.go create mode 100644 lib/logstorage/indexdb_test.go create mode 100644 lib/logstorage/inmemory_part.go create mode 100644 lib/logstorage/inmemory_part_test.go create mode 100644 lib/logstorage/inmemory_part_timing_test.go create mode 100644 lib/logstorage/log_rows.go create mode 100644 lib/logstorage/log_rows_timing_test.go create mode 100644 lib/logstorage/parser.go create mode 100644 lib/logstorage/parser_test.go create mode 100644 lib/logstorage/part.go create mode 100644 lib/logstorage/part_header.go create mode 100644 lib/logstorage/part_header_test.go create mode 100644 lib/logstorage/partition.go create mode 100644 lib/logstorage/partition_test.go create mode 100644 lib/logstorage/rows.go create mode 100644 lib/logstorage/rows_test.go create mode 100644 lib/logstorage/storage.go create mode 100644 lib/logstorage/storage_search.go create mode 100644 lib/logstorage/storage_search_test.go create mode 100644 lib/logstorage/storage_test.go create mode 100644 lib/logstorage/stream_filter.go create mode 100644 lib/logstorage/stream_id.go create mode 100644 lib/logstorage/stream_id_test.go create mode 100644 lib/logstorage/stream_tags.go create mode 100644 lib/logstorage/tenant_id.go create mode 100644 lib/logstorage/tenant_id_test.go create mode 100644 lib/logstorage/tokenizer.go create mode 100644 lib/logstorage/tokenizer_test.go create mode 100644 lib/logstorage/tokenizer_timing_test.go create mode 100644 lib/logstorage/u128.go create mode 100644 lib/logstorage/u128_test.go create mode 100644 lib/logstorage/values_encoder.go create mode 100644 lib/logstorage/values_encoder_test.go create mode 100644 lib/logstorage/values_encoder_timing_test.go diff --git a/Makefile b/Makefile index 551c587c7..a97c9f389 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,7 @@ include package/release/Makefile all: \ victoria-metrics-prod \ + victoria-logs-prod \ vmagent-prod \ vmalert-prod \ vmauth-prod \ @@ -33,6 +34,7 @@ clean: publish: docker-scan \ publish-victoria-metrics \ + publish-victoria-logs \ publish-vmagent \ publish-vmalert \ publish-vmauth \ @@ -42,6 +44,7 @@ publish: docker-scan \ package: \ package-victoria-metrics \ + package-victoria-logs \ package-vmagent \ package-vmalert \ package-vmauth \ @@ -178,6 +181,7 @@ publish-release: release: \ release-victoria-metrics \ + release-victoria-logs \ release-vmutils release-victoria-metrics: \ @@ -191,7 +195,6 @@ release-victoria-metrics: \ release-victoria-metrics-openbsd-amd64 \ release-victoria-metrics-windows-amd64 -# adds i386 arch release-victoria-metrics-linux-386: GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch @@ -238,6 +241,63 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod cd bin && rm -rf \ victoria-metrics-windows-$(GOARCH)-prod.exe +release-victoria-logs: \ + release-victoria-logs-linux-386 \ + release-victoria-logs-linux-amd64 \ + release-victoria-logs-linux-arm \ + release-victoria-logs-linux-arm64 \ + release-victoria-logs-darwin-amd64 \ + release-victoria-logs-darwin-arm64 \ + release-victoria-logs-freebsd-amd64 \ + release-victoria-logs-openbsd-amd64 \ + release-victoria-logs-windows-amd64 + +release-victoria-logs-linux-386: + GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-linux-amd64: + GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-linux-arm: + GOOS=linux GOARCH=arm $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-linux-arm64: + GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-darwin-amd64: + GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-darwin-arm64: + GOOS=darwin GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-freebsd-amd64: + GOOS=freebsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-openbsd-amd64: + GOOS=openbsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch + +release-victoria-logs-windows-amd64: + GOARCH=amd64 $(MAKE) release-victoria-logs-windows-goarch + +release-victoria-logs-goos-goarch: victoria-logs-$(GOOS)-$(GOARCH)-prod + cd bin && \ + tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \ + victoria-logs-$(GOOS)-$(GOARCH)-prod \ + && sha256sum victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \ + victoria-logs-$(GOOS)-$(GOARCH)-prod \ + | sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt + cd bin && rm -rf victoria-logs-$(GOOS)-$(GOARCH)-prod + +release-victoria-logs-windows-goarch: victoria-logs-windows-$(GOARCH)-prod + cd bin && \ + zip victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \ + victoria-logs-windows-$(GOARCH)-prod.exe \ + && sha256sum victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \ + victoria-logs-windows-$(GOARCH)-prod.exe \ + > victoria-logs-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt + cd bin && rm -rf \ + victoria-logs-windows-$(GOARCH)-prod.exe + release-vmutils: \ release-vmutils-linux-386 \ release-vmutils-linux-amd64 \ diff --git a/app/victoria-logs/Makefile b/app/victoria-logs/Makefile new file mode 100644 index 000000000..6b2170613 --- /dev/null +++ b/app/victoria-logs/Makefile @@ -0,0 +1,103 @@ +# All these commands must run from repository root. + +victoria-logs: + APP_NAME=victoria-logs $(MAKE) app-local + +victoria-logs-race: + APP_NAME=victoria-logs RACE=-race $(MAKE) app-local + +victoria-logs-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker + +victoria-logs-pure-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-pure + +victoria-logs-linux-amd64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-amd64 + +victoria-logs-linux-arm-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm + +victoria-logs-linux-arm64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm64 + +victoria-logs-linux-ppc64le-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-ppc64le + +victoria-logs-linux-386-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-386 + +victoria-logs-darwin-amd64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-amd64 + +victoria-logs-darwin-arm64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-arm64 + +victoria-logs-freebsd-amd64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-freebsd-amd64 + +victoria-logs-openbsd-amd64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-openbsd-amd64 + +victoria-logs-windows-amd64-prod: + APP_NAME=victoria-logs $(MAKE) app-via-docker-windows-amd64 + +package-victoria-logs: + APP_NAME=victoria-logs $(MAKE) package-via-docker + +package-victoria-logs-pure: + APP_NAME=victoria-logs $(MAKE) package-via-docker-pure + +package-victoria-logs-amd64: + APP_NAME=victoria-logs $(MAKE) package-via-docker-amd64 + +package-victoria-logs-arm: + APP_NAME=victoria-logs $(MAKE) package-via-docker-arm + +package-victoria-logs-arm64: + APP_NAME=victoria-logs $(MAKE) package-via-docker-arm64 + +package-victoria-logs-ppc64le: + APP_NAME=victoria-logs $(MAKE) package-via-docker-ppc64le + +package-victoria-logs-386: + APP_NAME=victoria-logs $(MAKE) package-via-docker-386 + +publish-victoria-logs: + APP_NAME=victoria-logs $(MAKE) publish-via-docker + +victoria-logs-linux-amd64: + APP_NAME=victoria-logs CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch + +victoria-logs-linux-arm: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch + +victoria-logs-linux-arm64: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch + +victoria-logs-linux-ppc64le: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch + +victoria-logs-linux-s390x: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch + +victoria-logs-linux-386: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch + +victoria-logs-darwin-amd64: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch + +victoria-logs-darwin-arm64: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch + +victoria-logs-freebsd-amd64: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch + +victoria-logs-openbsd-amd64: + APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch + +victoria-logs-windows-amd64: + GOARCH=amd64 APP_NAME=victoria-logs $(MAKE) app-local-windows-goarch + +victoria-logs-pure: + APP_NAME=victoria-logs $(MAKE) app-local-pure diff --git a/app/victoria-logs/deployment/Dockerfile b/app/victoria-logs/deployment/Dockerfile new file mode 100644 index 000000000..865964d71 --- /dev/null +++ b/app/victoria-logs/deployment/Dockerfile @@ -0,0 +1,8 @@ +ARG base_image +FROM $base_image + +EXPOSE 8428 + +ENTRYPOINT ["/victoria-logs-prod"] +ARG src_binary +COPY $src_binary ./victoria-logs-prod diff --git a/app/victoria-logs/main.go b/app/victoria-logs/main.go new file mode 100644 index 000000000..27150810f --- /dev/null +++ b/app/victoria-logs/main.go @@ -0,0 +1,102 @@ +package main + +import ( + "flag" + "fmt" + "net/http" + "os" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert" + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect" + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics" +) + +var ( + httpListenAddr = flag.String("httpListenAddr", ":9428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol") + useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+ + "See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+ + "With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing") + gogc = flag.Int("gogc", 100, "GOGC to use. See https://tip.golang.org/doc/gc-guide") +) + +func main() { + // Write flags and help message to stdout, since it is easier to grep or pipe. + flag.CommandLine.SetOutput(os.Stdout) + flag.Usage = usage + envflag.Parse() + cgroup.SetGOGC(*gogc) + buildinfo.Init() + logger.Init() + pushmetrics.Init() + + logger.Infof("starting VictoriaLogs at %q...", *httpListenAddr) + startTime := time.Now() + + vlstorage.Init() + vlselect.Init() + vlinsert.Init() + + go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler) + logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds()) + + sig := procutil.WaitForSigterm() + logger.Infof("received signal %s", sig) + + logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr) + startTime = time.Now() + if err := httpserver.Stop(*httpListenAddr); err != nil { + logger.Fatalf("cannot stop the webservice: %s", err) + } + logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds()) + + vlinsert.Stop() + vlselect.Stop() + vlstorage.Stop() + + fs.MustStopDirRemover() + + logger.Infof("the VictoriaLogs has been stopped in %.3f seconds", time.Since(startTime).Seconds()) +} + +func requestHandler(w http.ResponseWriter, r *http.Request) bool { + if r.URL.Path == "/" { + if r.Method != http.MethodGet { + return false + } + w.Header().Add("Content-Type", "text/html; charset=utf-8") + fmt.Fprintf(w, "

Single-node VictoriaLogs


") + fmt.Fprintf(w, "See docs at https://docs.victoriametrics.com/VictoriaLogs/
") + fmt.Fprintf(w, "Useful endpoints:
") + httpserver.WriteAPIHelp(w, [][2]string{ + {"metrics", "available service metrics"}, + {"flags", "command-line flags"}, + }) + return true + } + if vlinsert.RequestHandler(w, r) { + return true + } + if vlselect.RequestHandler(w, r) { + return true + } + return false +} + +func usage() { + const s = ` +victoria-logs is a log management and analytics service. + +See the docs at https://docs.victoriametrics.com/VictoriaLogs/ +` + flagutil.Usage(s) +} diff --git a/app/victoria-logs/multiarch/Dockerfile b/app/victoria-logs/multiarch/Dockerfile new file mode 100644 index 000000000..220add3a4 --- /dev/null +++ b/app/victoria-logs/multiarch/Dockerfile @@ -0,0 +1,12 @@ +# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b +ARG certs_image +ARG root_image +FROM $certs_image as certs +RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates + +FROM $root_image +COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt +EXPOSE 8428 +ENTRYPOINT ["/victoria-logs-prod"] +ARG TARGETARCH +COPY victoria-logs-linux-${TARGETARCH}-prod ./victoria-logs-prod diff --git a/app/vlinsert/elasticsearch/bulk_response.qtpl b/app/vlinsert/elasticsearch/bulk_response.qtpl new file mode 100644 index 000000000..f2b499d0e --- /dev/null +++ b/app/vlinsert/elasticsearch/bulk_response.qtpl @@ -0,0 +1,20 @@ +{% stripspace %} + +{% func BulkResponse(n int, tookMs int64) %} +{ + "took":{%dl tookMs %}, + "errors":false, + "items":[ + {% for i := 0; i < n; i++ %} + { + "create":{ + "status":201 + } + } + {% if i+1 < n %},{% endif %} + {% endfor %} + ] +} +{% endfunc %} + +{% endstripspace %} diff --git a/app/vlinsert/elasticsearch/bulk_response.qtpl.go b/app/vlinsert/elasticsearch/bulk_response.qtpl.go new file mode 100644 index 000000000..5bd6c5a58 --- /dev/null +++ b/app/vlinsert/elasticsearch/bulk_response.qtpl.go @@ -0,0 +1,69 @@ +// Code generated by qtc from "bulk_response.qtpl". DO NOT EDIT. +// See https://github.com/valyala/quicktemplate for details. + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:3 +package elasticsearch + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:3 +import ( + qtio422016 "io" + + qt422016 "github.com/valyala/quicktemplate" +) + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:3 +var ( + _ = qtio422016.Copy + _ = qt422016.AcquireByteBuffer +) + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:3 +func StreamBulkResponse(qw422016 *qt422016.Writer, n int, tookMs int64) { +//line app/vlinsert/elasticsearch/bulk_response.qtpl:3 + qw422016.N().S(`{"took":`) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:5 + qw422016.N().DL(tookMs) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:5 + qw422016.N().S(`,"errors":false,"items":[`) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:8 + for i := 0; i < n; i++ { +//line app/vlinsert/elasticsearch/bulk_response.qtpl:8 + qw422016.N().S(`{"create":{"status":201}}`) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:14 + if i+1 < n { +//line app/vlinsert/elasticsearch/bulk_response.qtpl:14 + qw422016.N().S(`,`) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:14 + } +//line app/vlinsert/elasticsearch/bulk_response.qtpl:15 + } +//line app/vlinsert/elasticsearch/bulk_response.qtpl:15 + qw422016.N().S(`]}`) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 +} + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 +func WriteBulkResponse(qq422016 qtio422016.Writer, n int, tookMs int64) { +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + StreamBulkResponse(qw422016, n, tookMs) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + qt422016.ReleaseWriter(qw422016) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 +} + +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 +func BulkResponse(n int, tookMs int64) string { +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + WriteBulkResponse(qb422016, n, tookMs) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + qs422016 := string(qb422016.B) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 + return qs422016 +//line app/vlinsert/elasticsearch/bulk_response.qtpl:18 +} diff --git a/app/vlinsert/elasticsearch/elasticsearch.go b/app/vlinsert/elasticsearch/elasticsearch.go new file mode 100644 index 000000000..de4cf945c --- /dev/null +++ b/app/vlinsert/elasticsearch/elasticsearch.go @@ -0,0 +1,410 @@ +package elasticsearch + +import ( + "bufio" + "errors" + "fmt" + "io" + "math" + "net/http" + "strconv" + "strings" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter" + "github.com/VictoriaMetrics/metrics" + "github.com/valyala/fastjson" +) + +var ( + maxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers") +) + +// RequestHandler processes ElasticSearch insert requests +func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool { + w.Header().Add("Content-Type", "application/json") + // This header is needed for Logstash + w.Header().Set("X-Elastic-Product", "Elasticsearch") + + if strings.HasPrefix(path, "/_ilm/policy") { + // Return fake response for ElasticSearch ilm request. + fmt.Fprintf(w, `{}`) + return true + } + if strings.HasPrefix(path, "/_index_template") { + // Return fake response for ElasticSearch index template request. + fmt.Fprintf(w, `{}`) + return true + } + if strings.HasPrefix(path, "/_ingest") { + // Return fake response for ElasticSearch ingest pipeline request. + // See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/put-pipeline-api.html + fmt.Fprintf(w, `{}`) + return true + } + if strings.HasPrefix(path, "/_nodes") { + // Return fake response for ElasticSearch nodes discovery request. + // See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/cluster.html + fmt.Fprintf(w, `{}`) + return true + } + switch path { + case "/": + switch r.Method { + case http.MethodGet: + // Return fake response for ElasticSearch ping request. + // See the latest available version for ElasticSearch at https://github.com/elastic/elasticsearch/releases + fmt.Fprintf(w, `{ + "version": { + "number": "8.8.0" + } + }`) + case http.MethodHead: + // Return empty response for Logstash ping request. + } + + return true + case "/_license": + // Return fake response for ElasticSearch license request. + fmt.Fprintf(w, `{ + "license": { + "uid": "cbff45e7-c553-41f7-ae4f-9205eabd80xx", + "type": "oss", + "status": "active", + "expiry_date_in_millis" : 4000000000000 + } + }`) + return true + case "/_bulk": + startTime := time.Now() + bulkRequestsTotal.Inc() + + // Extract tenantID + tenantID, err := logstorage.GetTenantIDFromRequest(r) + if err != nil { + httpserver.Errorf(w, r, "%s", err) + return true + } + + // Extract time field name from _time_field query arg + var timeField = "_time" + if tf := r.FormValue("_time_field"); tf != "" { + timeField = tf + } + + // Extract message field name from _msg_field query arg + var msgField = "" + if msgf := r.FormValue("_msg_field"); msgf != "" { + msgField = msgf + } + + // Extract stream field names from _stream_fields query arg + var streamFields []string + if sfs := r.FormValue("_stream_fields"); sfs != "" { + streamFields = strings.Split(sfs, ",") + } + + // Extract field names, which must be ignored + var ignoreFields []string + if ifs := r.FormValue("ignore_fields"); ifs != "" { + ignoreFields = strings.Split(ifs, ",") + } + + lr := logstorage.GetLogRows(streamFields, ignoreFields) + processLogMessage := func(timestamp int64, fields []logstorage.Field) { + lr.MustAdd(tenantID, timestamp, fields) + if lr.NeedFlush() { + vlstorage.MustAddRows(lr) + lr.Reset() + } + } + + isGzip := r.Header.Get("Content-Encoding") == "gzip" + n, err := readBulkRequest(r.Body, isGzip, timeField, msgField, processLogMessage) + if err != nil { + logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err) + return true + } + vlstorage.MustAddRows(lr) + logstorage.PutLogRows(lr) + + tookMs := time.Since(startTime).Milliseconds() + bw := bufferedwriter.Get(w) + defer bufferedwriter.Put(bw) + WriteBulkResponse(bw, n, tookMs) + _ = bw.Flush() + return true + default: + return false + } +} + +var bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`) + +func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, + processLogMessage func(timestamp int64, fields []logstorage.Field), +) (int, error) { + // See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html + + if isGzip { + zr, err := common.GetGzipReader(r) + if err != nil { + return 0, fmt.Errorf("cannot read gzipped _bulk request: %w", err) + } + defer common.PutGzipReader(zr) + r = zr + } + + wcr := writeconcurrencylimiter.GetReader(r) + defer writeconcurrencylimiter.PutReader(wcr) + + lb := lineBufferPool.Get() + defer lineBufferPool.Put(lb) + + lb.B = bytesutil.ResizeNoCopyNoOverallocate(lb.B, maxLineSizeBytes.IntN()) + sc := bufio.NewScanner(wcr) + sc.Buffer(lb.B, len(lb.B)) + + n := 0 + nCheckpoint := 0 + for { + ok, err := readBulkLine(sc, timeField, msgField, processLogMessage) + wcr.DecConcurrency() + if err != nil || !ok { + rowsIngestedTotal.Add(n - nCheckpoint) + return n, err + } + n++ + if batchSize := n - nCheckpoint; n >= 1000 { + rowsIngestedTotal.Add(batchSize) + nCheckpoint = n + } + } +} + +var lineBufferPool bytesutil.ByteBufferPool + +var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`) + +func readBulkLine(sc *bufio.Scanner, timeField, msgField string, + processLogMessage func(timestamp int64, fields []logstorage.Field), +) (bool, error) { + // Decode command, must be "create" or "index" + if !sc.Scan() { + if err := sc.Err(); err != nil { + if errors.Is(err, bufio.ErrTooLong) { + return false, fmt.Errorf(`cannot read "create" or "index" command, since its size exceeds -insert.maxLineSizeBytes=%d`, maxLineSizeBytes.IntN()) + } + return false, err + } + return false, nil + } + line := sc.Bytes() + p := parserPool.Get() + v, err := p.ParseBytes(line) + if err != nil { + return false, fmt.Errorf(`cannot parse "create" or "index" command: %w`, err) + } + if v.GetObject("create") == nil && v.GetObject("index") == nil { + return false, fmt.Errorf(`unexpected command %q; expected "create" or "index"`, v) + } + parserPool.Put(p) + + // Decode log message + if !sc.Scan() { + if err := sc.Err(); err != nil { + if errors.Is(err, bufio.ErrTooLong) { + return false, fmt.Errorf("cannot read log message, since its size exceeds -insert.maxLineSizeBytes=%d", maxLineSizeBytes.IntN()) + } + return false, err + } + return false, fmt.Errorf(`missing log message after the "create" or "index" command`) + } + line = sc.Bytes() + pctx := getParserCtx() + if err := pctx.parseLogMessage(line); err != nil { + invalidJSONLineLogger.Warnf("cannot parse json-encoded log entry: %s", err) + return true, nil + } + + timestamp, err := extractTimestampFromFields(timeField, pctx.fields) + if err != nil { + invalidTimestampLogger.Warnf("skipping the log entry because cannot parse timestamp: %s", err) + return true, nil + } + updateMessageFieldName(msgField, pctx.fields) + processLogMessage(timestamp, pctx.fields) + putParserCtx(pctx) + return true, nil +} + +var parserPool fastjson.ParserPool + +var ( + invalidTimestampLogger = logger.WithThrottler("invalidTimestampLogger", 5*time.Second) + invalidJSONLineLogger = logger.WithThrottler("invalidJSONLineLogger", 5*time.Second) +) + +func extractTimestampFromFields(timeField string, fields []logstorage.Field) (int64, error) { + for i := range fields { + f := &fields[i] + if f.Name != timeField { + continue + } + timestamp, err := parseElasticsearchTimestamp(f.Value) + if err != nil { + return 0, err + } + f.Value = "" + return timestamp, nil + } + return time.Now().UnixNano(), nil +} + +func updateMessageFieldName(msgField string, fields []logstorage.Field) { + if msgField == "" { + return + } + for i := range fields { + f := &fields[i] + if f.Name == msgField { + f.Name = "_msg" + return + } + } +} + +type parserCtx struct { + p fastjson.Parser + buf []byte + prefixBuf []byte + fields []logstorage.Field +} + +func (pctx *parserCtx) reset() { + pctx.buf = pctx.buf[:0] + pctx.prefixBuf = pctx.prefixBuf[:0] + + fields := pctx.fields + for i := range fields { + lf := &fields[i] + lf.Name = "" + lf.Value = "" + } + pctx.fields = fields[:0] +} + +func getParserCtx() *parserCtx { + v := parserCtxPool.Get() + if v == nil { + return &parserCtx{} + } + return v.(*parserCtx) +} + +func putParserCtx(pctx *parserCtx) { + pctx.reset() + parserCtxPool.Put(pctx) +} + +var parserCtxPool sync.Pool + +func (pctx *parserCtx) parseLogMessage(msg []byte) error { + s := bytesutil.ToUnsafeString(msg) + v, err := pctx.p.Parse(s) + if err != nil { + return fmt.Errorf("cannot parse json: %w", err) + } + if t := v.Type(); t != fastjson.TypeObject { + return fmt.Errorf("expecting json dictionary; got %s", t) + } + pctx.reset() + pctx.fields, pctx.buf, pctx.prefixBuf = appendLogFields(pctx.fields, pctx.buf, pctx.prefixBuf, v) + return nil +} + +func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) { + o := v.GetObject() + o.Visit(func(k []byte, v *fastjson.Value) { + t := v.Type() + switch t { + case fastjson.TypeNull: + // Skip nulls + case fastjson.TypeObject: + // Flatten nested JSON objects. + // For example, {"foo":{"bar":"baz"}} is converted to {"foo.bar":"baz"} + prefixLen := len(prefixBuf) + prefixBuf = append(prefixBuf, k...) + prefixBuf = append(prefixBuf, '.') + dst, dstBuf, prefixBuf = appendLogFields(dst, dstBuf, prefixBuf, v) + prefixBuf = prefixBuf[:prefixLen] + case fastjson.TypeArray, fastjson.TypeNumber, fastjson.TypeTrue, fastjson.TypeFalse: + // Convert JSON arrays, numbers, true and false values to their string representation + dstBufLen := len(dstBuf) + dstBuf = v.MarshalTo(dstBuf) + value := dstBuf[dstBufLen:] + dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value) + case fastjson.TypeString: + // Decode JSON strings + dstBufLen := len(dstBuf) + dstBuf = append(dstBuf, v.GetStringBytes()...) + value := dstBuf[dstBufLen:] + dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value) + default: + logger.Panicf("BUG: unexpected JSON type: %s", t) + } + }) + return dst, dstBuf, prefixBuf +} + +func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) { + dstBufLen := len(dstBuf) + dstBuf = append(dstBuf, prefixBuf...) + dstBuf = append(dstBuf, k...) + name := dstBuf[dstBufLen:] + + dst = append(dst, logstorage.Field{ + Name: bytesutil.ToUnsafeString(name), + Value: bytesutil.ToUnsafeString(value), + }) + return dst, dstBuf +} + +func parseElasticsearchTimestamp(s string) (int64, error) { + if len(s) < len("YYYY-MM-DD") || s[len("YYYY")] != '-' { + // Try parsing timestamp in milliseconds + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, fmt.Errorf("cannot parse timestamp in milliseconds from %q: %w", s, err) + } + if n > int64(math.MaxInt64)/1e6 { + return 0, fmt.Errorf("too big timestamp in milliseconds: %d; mustn't exceed %d", n, int64(math.MaxInt64)/1e6) + } + if n < int64(math.MinInt64)/1e6 { + return 0, fmt.Errorf("too small timestamp in milliseconds: %d; must be bigger than %d", n, int64(math.MinInt64)/1e6) + } + n *= 1e6 + return n, nil + } + if len(s) == len("YYYY-MM-DD") { + t, err := time.Parse("2006-01-02", s) + if err != nil { + return 0, fmt.Errorf("cannot parse date %q: %w", s, err) + } + return t.UnixNano(), nil + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return 0, fmt.Errorf("cannot parse timestamp %q: %w", s, err) + } + return t.UnixNano(), nil +} diff --git a/app/vlinsert/elasticsearch/elasticsearch_test.go b/app/vlinsert/elasticsearch/elasticsearch_test.go new file mode 100644 index 000000000..ff9a5a110 --- /dev/null +++ b/app/vlinsert/elasticsearch/elasticsearch_test.go @@ -0,0 +1,97 @@ +package elasticsearch + +import ( + "bytes" + "compress/gzip" + "fmt" + "reflect" + "strings" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +func TestReadBulkRequest(t *testing.T) { + f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) { + t.Helper() + + var timestamps []int64 + var result string + processLogMessage := func(timestamp int64, fields []logstorage.Field) { + timestamps = append(timestamps, timestamp) + + a := make([]string, len(fields)) + for i, f := range fields { + a[i] = fmt.Sprintf("%q:%q", f.Name, f.Value) + } + s := "{" + strings.Join(a, ",") + "}\n" + result += s + } + + // Read the request without compression + r := bytes.NewBufferString(data) + rows, err := readBulkRequest(r, false, timeField, msgField, processLogMessage) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if rows != rowsExpected { + t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected) + } + + if !reflect.DeepEqual(timestamps, timestampsExpected) { + t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected) + } + if result != resultExpected { + t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected) + } + + // Read the request with compression + timestamps = nil + result = "" + compressedData := compressData(data) + r = bytes.NewBufferString(compressedData) + rows, err = readBulkRequest(r, true, timeField, msgField, processLogMessage) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if rows != rowsExpected { + t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected) + } + + if !reflect.DeepEqual(timestamps, timestampsExpected) { + t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected) + } + if result != resultExpected { + t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected) + } + } + + data := `{"create":{"_index":"filebeat-8.8.0"}} +{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"} +{"create":{"_index":"filebeat-8.8.0"}} +{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"} +{"create":{"_index":"filebeat-8.8.0"}} +{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"} +` + timeField := "@timestamp" + msgField := "message" + rowsExpected := 3 + timestampsExpected := []int64{1686026891735000000, 1686026892735000000, 1686026893735000000} + resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"} +{"@timestamp":"","_msg":"baz"} +{"_msg":"xyz","@timestamp":"","x":"y"} +` + f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected) +} + +func compressData(s string) string { + var bb bytes.Buffer + zw := gzip.NewWriter(&bb) + if _, err := zw.Write([]byte(s)); err != nil { + panic(fmt.Errorf("unexpected error when compressing data: %s", err)) + } + if err := zw.Close(); err != nil { + panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err)) + } + return bb.String() +} diff --git a/app/vlinsert/elasticsearch/elasticsearch_timing_test.go b/app/vlinsert/elasticsearch/elasticsearch_timing_test.go new file mode 100644 index 000000000..9a50fe0eb --- /dev/null +++ b/app/vlinsert/elasticsearch/elasticsearch_timing_test.go @@ -0,0 +1,50 @@ +package elasticsearch + +import ( + "bytes" + "fmt" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +func BenchmarkReadBulkRequest(b *testing.B) { + b.Run("gzip:off", func(b *testing.B) { + benchmarkReadBulkRequest(b, false) + }) + b.Run("gzip:on", func(b *testing.B) { + benchmarkReadBulkRequest(b, true) + }) +} + +func benchmarkReadBulkRequest(b *testing.B, isGzip bool) { + data := `{"create":{"_index":"filebeat-8.8.0"}} +{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"} +{"create":{"_index":"filebeat-8.8.0"}} +{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"} +{"create":{"_index":"filebeat-8.8.0"}} +{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"} +` + if isGzip { + data = compressData(data) + } + dataBytes := bytesutil.ToUnsafeBytes(data) + + timeField := "@timestamp" + msgField := "message" + processLogMessage := func(timestmap int64, fields []logstorage.Field) {} + + b.ReportAllocs() + b.SetBytes(int64(len(data))) + b.RunParallel(func(pb *testing.PB) { + r := &bytes.Reader{} + for pb.Next() { + r.Reset(dataBytes) + _, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage) + if err != nil { + panic(fmt.Errorf("unexpected error: %s", err)) + } + } + }) +} diff --git a/app/vlinsert/main.go b/app/vlinsert/main.go new file mode 100644 index 000000000..64157229f --- /dev/null +++ b/app/vlinsert/main.go @@ -0,0 +1,34 @@ +package vlinsert + +import ( + "net/http" + "strings" + + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/elasticsearch" +) + +// Init initializes vlinsert +func Init() { +} + +// Stop stops vlinsert +func Stop() { +} + +// RequestHandler handles insert requests for VictoriaLogs +func RequestHandler(w http.ResponseWriter, r *http.Request) bool { + path := r.URL.Path + if !strings.HasPrefix(path, "/insert/") { + return false + } + path = strings.TrimPrefix(path, "/insert") + path = strings.ReplaceAll(path, "//", "/") + + switch { + case strings.HasPrefix(path, "/elasticsearch/"): + path = strings.TrimPrefix(path, "/elasticsearch") + return elasticsearch.RequestHandler(path, w, r) + default: + return false + } +} diff --git a/app/vlselect/logsql/logsql.go b/app/vlselect/logsql/logsql.go new file mode 100644 index 000000000..05d6f8830 --- /dev/null +++ b/app/vlselect/logsql/logsql.go @@ -0,0 +1,53 @@ +package logsql + +import ( + "net/http" + + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +// ProcessQueryRequest handles /select/logsql/query request +func ProcessQueryRequest(w http.ResponseWriter, r *http.Request, stopCh <-chan struct{}) { + // Extract tenantID + tenantID, err := logstorage.GetTenantIDFromRequest(r) + if err != nil { + httpserver.Errorf(w, r, "%s", err) + return + } + + qStr := r.FormValue("query") + q, err := logstorage.ParseQuery(qStr) + if err != nil { + httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err) + return + } + w.Header().Set("Content-Type", "application/stream+json; charset=utf-8") + + bw := bufferedwriter.Get(w) + defer bufferedwriter.Put(bw) + + tenantIDs := []logstorage.TenantID{tenantID} + vlstorage.RunQuery(tenantIDs, q, stopCh, func(columns []logstorage.BlockColumn) { + if len(columns) == 0 { + return + } + rowsCount := len(columns[0].Values) + + bb := blockResultPool.Get() + for rowIdx := 0; rowIdx < rowsCount; rowIdx++ { + WriteJSONRow(bb, columns, rowIdx) + } + // Do not check for error here, since the only valid error is when the client + // closes the connection during Write() call. There is no need in logging this error, + // since it may be too verbose and it doesn't give any actionable info. + _, _ = bw.Write(bb.B) + blockResultPool.Put(bb) + }) + _ = bw.Flush() +} + +var blockResultPool bytesutil.ByteBufferPool diff --git a/app/vlselect/logsql/query_response.qtpl b/app/vlselect/logsql/query_response.qtpl new file mode 100644 index 000000000..c98b0c9bd --- /dev/null +++ b/app/vlselect/logsql/query_response.qtpl @@ -0,0 +1,20 @@ +{% import ( + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) %} + +{% stripspace %} + +// JSONRow creates JSON row from the given fields. +{% func JSONRow(columns []logstorage.BlockColumn, rowIdx int) %} +{ + {% code c := &columns[0] %} + {%q= c.Name %}:{%q= c.Values[rowIdx] %} + {% code columns = columns[1:] %} + {% for colIdx := range columns %} + {% code c := &columns[colIdx] %} + ,{%q= c.Name %}:{%q= c.Values[rowIdx] %} + {% endfor %} +}{% newline %} +{% endfunc %} + +{% endstripspace %} diff --git a/app/vlselect/logsql/query_response.qtpl.go b/app/vlselect/logsql/query_response.qtpl.go new file mode 100644 index 000000000..d3d6cf1c1 --- /dev/null +++ b/app/vlselect/logsql/query_response.qtpl.go @@ -0,0 +1,90 @@ +// Code generated by qtc from "query_response.qtpl". DO NOT EDIT. +// See https://github.com/valyala/quicktemplate for details. + +//line app/vlselect/logsql/query_response.qtpl:1 +package logsql + +//line app/vlselect/logsql/query_response.qtpl:1 +import ( + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +// JSONRow creates JSON row from the given fields. + +//line app/vlselect/logsql/query_response.qtpl:8 +import ( + qtio422016 "io" + + qt422016 "github.com/valyala/quicktemplate" +) + +//line app/vlselect/logsql/query_response.qtpl:8 +var ( + _ = qtio422016.Copy + _ = qt422016.AcquireByteBuffer +) + +//line app/vlselect/logsql/query_response.qtpl:8 +func StreamJSONRow(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { +//line app/vlselect/logsql/query_response.qtpl:8 + qw422016.N().S(`{`) +//line app/vlselect/logsql/query_response.qtpl:10 + c := &columns[0] + +//line app/vlselect/logsql/query_response.qtpl:11 + qw422016.N().Q(c.Name) +//line app/vlselect/logsql/query_response.qtpl:11 + qw422016.N().S(`:`) +//line app/vlselect/logsql/query_response.qtpl:11 + qw422016.N().Q(c.Values[rowIdx]) +//line app/vlselect/logsql/query_response.qtpl:12 + columns = columns[1:] + +//line app/vlselect/logsql/query_response.qtpl:13 + for colIdx := range columns { +//line app/vlselect/logsql/query_response.qtpl:14 + c := &columns[colIdx] + +//line app/vlselect/logsql/query_response.qtpl:14 + qw422016.N().S(`,`) +//line app/vlselect/logsql/query_response.qtpl:15 + qw422016.N().Q(c.Name) +//line app/vlselect/logsql/query_response.qtpl:15 + qw422016.N().S(`:`) +//line app/vlselect/logsql/query_response.qtpl:15 + qw422016.N().Q(c.Values[rowIdx]) +//line app/vlselect/logsql/query_response.qtpl:16 + } +//line app/vlselect/logsql/query_response.qtpl:16 + qw422016.N().S(`}`) +//line app/vlselect/logsql/query_response.qtpl:17 + qw422016.N().S(` +`) +//line app/vlselect/logsql/query_response.qtpl:18 +} + +//line app/vlselect/logsql/query_response.qtpl:18 +func WriteJSONRow(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { +//line app/vlselect/logsql/query_response.qtpl:18 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vlselect/logsql/query_response.qtpl:18 + StreamJSONRow(qw422016, columns, rowIdx) +//line app/vlselect/logsql/query_response.qtpl:18 + qt422016.ReleaseWriter(qw422016) +//line app/vlselect/logsql/query_response.qtpl:18 +} + +//line app/vlselect/logsql/query_response.qtpl:18 +func JSONRow(columns []logstorage.BlockColumn, rowIdx int) string { +//line app/vlselect/logsql/query_response.qtpl:18 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vlselect/logsql/query_response.qtpl:18 + WriteJSONRow(qb422016, columns, rowIdx) +//line app/vlselect/logsql/query_response.qtpl:18 + qs422016 := string(qb422016.B) +//line app/vlselect/logsql/query_response.qtpl:18 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vlselect/logsql/query_response.qtpl:18 + return qs422016 +//line app/vlselect/logsql/query_response.qtpl:18 +} diff --git a/app/vlselect/main.go b/app/vlselect/main.go new file mode 100644 index 000000000..022740496 --- /dev/null +++ b/app/vlselect/main.go @@ -0,0 +1,140 @@ +package vlselect + +import ( + "flag" + "fmt" + "net/http" + "strings" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool" + "github.com/VictoriaMetrics/metrics" +) + +var ( + maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+ + "It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+ + "See also -search.maxQueueDuration") + maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+ + "limit is reached; see also -search.maxQueryDuration") + maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution") +) + +func getDefaultMaxConcurrentRequests() int { + n := cgroup.AvailableCPUs() + if n <= 4 { + n *= 2 + } + if n > 16 { + // A single request can saturate all the CPU cores, so there is no sense + // in allowing higher number of concurrent requests - they will just contend + // for unavailable CPU time. + n = 16 + } + return n +} + +// Init initializes vlselect +func Init() { + concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests) +} + +// Stop stops vlselect +func Stop() { +} + +var concurrencyLimitCh chan struct{} + +var ( + concurrencyLimitReached = metrics.NewCounter(`vl_concurrent_select_limit_reached_total`) + concurrencyLimitTimeout = metrics.NewCounter(`vl_concurrent_select_limit_timeout_total`) + + _ = metrics.NewGauge(`vl_concurrent_select_capacity`, func() float64 { + return float64(cap(concurrencyLimitCh)) + }) + _ = metrics.NewGauge(`vl_concurrent_select_current`, func() float64 { + return float64(len(concurrencyLimitCh)) + }) +) + +// RequestHandler handles select requests for VictoriaLogs +func RequestHandler(w http.ResponseWriter, r *http.Request) bool { + path := r.URL.Path + if !strings.HasPrefix(path, "/select/") { + return false + } + path = strings.TrimPrefix(path, "/select") + path = strings.ReplaceAll(path, "//", "/") + + // Limit the number of concurrent queries. + startTime := time.Now() + stopCh := r.Context().Done() + select { + case concurrencyLimitCh <- struct{}{}: + defer func() { <-concurrencyLimitCh }() + default: + // Sleep for a while until giving up. This should resolve short bursts in requests. + concurrencyLimitReached.Inc() + d := getMaxQueryDuration(r) + if d > *maxQueueDuration { + d = *maxQueueDuration + } + t := timerpool.Get(d) + select { + case concurrencyLimitCh <- struct{}{}: + timerpool.Put(t) + defer func() { <-concurrencyLimitCh }() + case <-stopCh: + timerpool.Put(t) + remoteAddr := httpserver.GetQuotedRemoteAddr(r) + requestURI := httpserver.GetRequestURI(r) + logger.Infof("client has cancelled the request after %.3f seconds: remoteAddr=%s, requestURI: %q", + time.Since(startTime).Seconds(), remoteAddr, requestURI) + return true + case <-t.C: + timerpool.Put(t) + concurrencyLimitTimeout.Inc() + err := &httpserver.ErrorWithStatusCode{ + Err: fmt.Errorf("couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests "+ + "are executed. Possible solutions: to reduce query load; to add more compute resources to the server; "+ + "to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration; to increase -search.maxConcurrentRequests", + d.Seconds(), *maxConcurrentRequests, maxQueueDuration), + StatusCode: http.StatusServiceUnavailable, + } + httpserver.Errorf(w, r, "%s", err) + return true + } + } + + switch { + case path == "/logsql/query": + logsqlQueryRequests.Inc() + httpserver.EnableCORS(w, r) + logsql.ProcessQueryRequest(w, r, stopCh) + return true + default: + return false + } +} + +// getMaxQueryDuration returns the maximum duration for query from r. +func getMaxQueryDuration(r *http.Request) time.Duration { + dms, err := httputils.GetDuration(r, "timeout", 0) + if err != nil { + dms = 0 + } + d := time.Duration(dms) * time.Millisecond + if d <= 0 || d > *maxQueryDuration { + d = *maxQueryDuration + } + return d +} + +var ( + logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`) +) diff --git a/app/vlstorage/main.go b/app/vlstorage/main.go new file mode 100644 index 000000000..afaa78f1d --- /dev/null +++ b/app/vlstorage/main.go @@ -0,0 +1,149 @@ +package vlstorage + +import ( + "flag" + "fmt" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" + "github.com/VictoriaMetrics/metrics" +) + +var ( + retentionPeriod = flagutil.NewDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+ + "log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+ + "see https://docs.victoriametrics.com/VictoriaLogs/#retention") + futureRetention = flagutil.NewDuration("futureRetention", "2d", "Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; "+ + "see https://docs.victoriametrics.com/VictoriaLogs/#retention") + storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory with the VictoriaLogs data; "+ + "see https://docs.victoriametrics.com/VictoriaLogs/#storage") + inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+ + "The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+ + "Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+ + "Smaller intervals increase disk IO load. Minimum supported value is 1s") + logNewStreams = flag.Bool("logNewStreams", false, "Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; "+ + "see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows") + logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+ + "see https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion ; see also -logNewStreams") +) + +// Init initializes vlstorage. +// +// Stop must be called when vlstorage is no longer needed +func Init() { + if strg != nil { + logger.Panicf("BUG: Init() has been already called") + } + + if retentionPeriod.Msecs < 24*3600*1000 { + logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod) + } + cfg := &logstorage.StorageConfig{ + Retention: time.Millisecond * time.Duration(retentionPeriod.Msecs), + FlushInterval: *inmemoryDataFlushInterval, + FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs), + LogNewStreams: *logNewStreams, + LogIngestedRows: *logIngestedRows, + } + strg = logstorage.MustOpenStorage(*storageDataPath, cfg) + storageMetrics = initStorageMetrics(strg) + metrics.RegisterSet(storageMetrics) +} + +// Stop stops vlstorage. +func Stop() { + metrics.UnregisterSet(storageMetrics) + storageMetrics = nil + + strg.MustClose() + strg = nil +} + +var strg *logstorage.Storage +var storageMetrics *metrics.Set + +// MustAddRows adds lr to vlstorage +func MustAddRows(lr *logstorage.LogRows) { + strg.MustAddRows(lr) +} + +// RunQuery runs the given q and calls processBlock for the returned data blocks +func RunQuery(tenantIDs []logstorage.TenantID, q *logstorage.Query, stopCh <-chan struct{}, processBlock func(columns []logstorage.BlockColumn)) { + strg.RunQuery(tenantIDs, q, stopCh, processBlock) +} + +func initStorageMetrics(strg *logstorage.Storage) *metrics.Set { + ssCache := &logstorage.StorageStats{} + var ssCacheLock sync.Mutex + var lastUpdateTime time.Time + + m := func() *logstorage.StorageStats { + ssCacheLock.Lock() + defer ssCacheLock.Unlock() + if time.Since(lastUpdateTime) < time.Second { + return ssCache + } + var ss logstorage.StorageStats + strg.UpdateStats(&ss) + ssCache = &ss + lastUpdateTime = time.Now() + return ssCache + } + + ms := metrics.NewSet() + + ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 { + return float64(fs.MustGetFreeSpace(*storageDataPath)) + }) + + ms.NewGauge(`vl_rows{type="inmemory"}`, func() float64 { + return float64(m().InmemoryRowsCount) + }) + ms.NewGauge(`vl_rows{type="file"}`, func() float64 { + return float64(m().FileRowsCount) + }) + ms.NewGauge(`vl_parts{type="inmemory"}`, func() float64 { + return float64(m().InmemoryParts) + }) + ms.NewGauge(`vl_parts{type="file"}`, func() float64 { + return float64(m().FileParts) + }) + ms.NewGauge(`vl_blocks{type="inmemory"}`, func() float64 { + return float64(m().InmemoryBlocks) + }) + ms.NewGauge(`vl_blocks{type="file"}`, func() float64 { + return float64(m().FileBlocks) + }) + ms.NewGauge(`vl_partitions`, func() float64 { + return float64(m().PartitionsCount) + }) + ms.NewGauge(`vl_streams_created_total`, func() float64 { + return float64(m().StreamsCreatedTotal) + }) + + ms.NewGauge(`vl_compressed_data_size_bytes{type="inmemory"}`, func() float64 { + return float64(m().CompressedInmemorySize) + }) + ms.NewGauge(`vl_compressed_data_size_bytes{type="file"}`, func() float64 { + return float64(m().CompressedFileSize) + }) + ms.NewGauge(`vl_uncompressed_data_size_bytes{type="inmemory"}`, func() float64 { + return float64(m().UncompressedInmemorySize) + }) + ms.NewGauge(`vl_uncompressed_data_size_bytes{type="file"}`, func() float64 { + return float64(m().UncompressedFileSize) + }) + + ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_big_timestamp"}`, func() float64 { + return float64(m().RowsDroppedTooBigTimestamp) + }) + ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_small_timestamp"}`, func() float64 { + return float64(m().RowsDroppedTooSmallTimestamp) + }) + + return ms +} diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md new file mode 100644 index 000000000..edb7e2b2b --- /dev/null +++ b/docs/VictoriaLogs/LogsQL.md @@ -0,0 +1,1087 @@ +# LogsQL + +LogsQL is a simple yet powerful query language for VictoriaLogs. It provides the following features: + +- Full-text search across [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). + See [word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter). +- Ability to combine filters into arbitrary complex [logical filters](#logical-filter). +- Ability to extract structured fields from unstructured logs at query time. See [these docs](#transformations). +- Ability to calculate various stats over the selected log entries. See [these docs](#stats). + +## LogsQL tutorial + +If you aren't familiar with VictoriaLogs, then start with [key concepts docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html). + +Then follow these docs: +- [How to run VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#how-to-run-victorialogs). +- [how to ingest data into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). +- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/#querying). + +The simplest LogsQL query is just a [word](#word), which must be found in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +For example, the following query finds all the logs with `error` word: + +```logsql +error +``` + +This query matches logs with any [timestamp](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field), +e.g. it may return logs from the previous year alongside recently ingested logs. + +If the queried [word](#word) clashes with LogsQL keywords, then just wrap it into quotes. +For example, the following query finds all the log messages with `and` [word](#word): + +```logsql +"and" +``` + +It is OK to wrap any word into quotes. For example: + +```logsql +"error" +``` + +Moreover, it is possible to wrap phrases containing multiple words in quotes. For example, the following query +finds log messages with the `error: cannot find file` phrase: + +```logsql +"error: cannot find file" +``` + +Usually logs from the previous year aren't so interesting comparing to the recently ingested logs. +So it is recommended adding [time filter](#time-filter) to the query. +For example, the following query returns logs with the `error` [word](#word), +which were ingested into VictoriaLogs during the last 5 minutes: + +```logsql +error AND _time:[now-5m,now] +``` + +This query consists of two [filters](#filters) joined with `AND` [operator](#logical-filter): + +- The filter on the `error` [word](#word). +- The filter on the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field). + +The `AND` operator means that the [log entry](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must match both filters in order to be selected. + +Typical LogsQL query constists of multiple [filters](#filters) joined with `AND` operator. It may be tiresome typing and then reading all these `AND` words. +So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above: + +```logsql +error _time:[now-5m,now] +``` + +The query returns the following [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) by default: + +- [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +- [`_stream` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) +- [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) + +Logs may contain arbitrary number of other fields. If you need obtaining some of these fields in query results, +then just refer them in the query with `field_name:*` [filter](#any-value-filter). +For example, the following query returns `host.hostname` field additionally to `_msg`, `_stream` and `_time` fields: + +```logsql +error _time:[now-5m,now] host.hostname:* +``` + +Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line. +Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors: + +```logsql +_time:[now-5m,now] error NOT buggy_app +``` + +This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char. +So the following query is equivalent to the previous one: + +```logsql +_time:[now-5m,now] error !buggy_app +``` + +Suppose another buggy app starts pushing invalid error logs to VictoriaLogs - it adds `foobar` [word](#word) to every emitted log line. +No problems - just add `!foobar` to the query in order to remove these buggy logs: + +```logsql +_time:[now-5m,now] error !buggy_app !foobar +``` + +This query can be rewritten to more clear query with the `OR` [operator](#logical-filter) inside parentheses: + +```logsql +_time:[now-5m,now] error !(buggy_app OR foobar) +``` + +Note that the parentheses are required here, since otherwise the query won't return the expected results. +The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar`. This query may return error logs +from the buggy app if they contain `foobar` [word](#word). This query also continues returning all the error logs from the second buggy app. +This is because of different priorities for `NOT`, `AND` and `OR` operators. +Read [these docs](#logical-filter) for more details. There is no need in remembering all these priority rules - +just wrap the needed query parts into explicit parentheses if you aren't sure in priority rules. +As an additional bonus, explicit parentheses make queries easier to read and maintain. + +Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +This word can be stored in other [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) such as `log.level`. +How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word: + +```logsq +_time:[now-5m,now] log.level:error !(buggy_app OR foobar) +``` + +The field name can be wrapped into quotes if it contains special chars or keywords, which may clash with LogsQL syntax. +Any [word](#word) also can be wrapped into quotes. So the following query is equivalent to the previous one: + +```logsql +"_time":[now-5m,now] "log.level":"error" !("buggy_app" OR "foobar") +``` + +What if the application identifier - such as `buggy_app` and `foobar` - is stored in the `app` field? Correct - just add `app:` prefix in front of `buggy_app` and `foobar`: + +```logsql +_time:[now-5m,now] log.level:error !(app:buggy_app OR app:foobar) +``` + +The query can be simplified by moving the `app:` prefix outside the parentheses: + +```logsql +_time:[now-5m,now] log.level:error !app:(buggy_app OR foobar) +``` + +The `app` field uniquely identifies the application instance if a single instance runs per each unique `app`. +In this case it is recommended associating the `app` field with [log stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). This usually improves both compression rate +and query performance when querying the needed streams via [`_stream` filter](#stream-filter). +If the `app` field is associated with the log stream, then the query above can be rewritten to more performant one: + +```logsql +_time:[now-5m,now] log.level:error _stream:{app!~"buggy_app|foobar"} +``` + +This query completely skips scanning for logs from `buggy_app` and `foobar` apps, thus significantly reducing disk read IO and CPU time +needed for performing the query. + +Finally, it is recommended reading [performance tips](#performance-tips). + +Now you are familiar with LogsQL basics. Read [query syntax](#query-syntax) if you want to continue learning LogsQL. + +### Key concepts + +#### Word + +LogsQL splits all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) into words +delimited by non-word chars such as whitespace, parens, punctuation chars, etc. For example, the `foo: (bar,"тест")!` string +is split into `foo`, `bar` and `тест` words. Words can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8) chars. +These words are taken into account by full-text search filters such as +[word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter). + +#### Query syntax + +LogsQL query consists of the following parts delimited by `|`: + +- [Filters](#filters), which select log entries for further processing. This part is required in LogsQL. Other parts are optional. +- Optional [stream context](#stream-context), which allows selecting surrounding log lines for the matching log lines. +- Optional [transformations](#transformations) for the selected log fields. + For example, an additional fields can be extracted or constructed from existing fields. +- Optional [post-filters](#post-filters) for post-filtering of the selected results. For example, post-filtering can filter + results based on the fields constructed by [transformations](#transformations). +- Optional [stats](#stats) transformations, which can calculate various stats across selected results. +- Optional [sorting](#sorting), which can sort the results by the sepcified fields. +- Optional [limiters](#limiters), which can apply various limits on the selected results. + +## Filters + +LogsQL supports various filters for searching for log messages (see below). +They can be combined into arbitrary complex queries via [logical filters](#logical-filter). + +Filters are applied to [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) by default. +If the filter must be applied to other [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model), +then its' name followed by the colon must be put in front of the filter. For example, if `error` [word filter](#word-filter) must be applied +to the `log.level` field, then use `log.level:error` query. + +Field names and filter args can be put into quotes if they contain special chars, which may clash with LogsQL syntax. LogsQL supports quoting via double quotes `"`, +single quotes `'` and backticks: + +```logsql +"some 'field':123":i('some("value")') AND `other"value'` +``` + +If doubt, it is recommended quoting field names and filter args. + + +The list of LogsQL filters: + +- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field) in the given time range +- [Stream filter](#stream-filter) - matches logs, which belong to the given [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) +- [Word filter](#word-filter) - matches logs with the given [word](#word) +- [Phrase filter](#phrase-filter) - matches logs with the given phrase +- [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix +- [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +- [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +- [Exact filter](#exact-filter) - matches logs with the exact value +- [Exact prefix filter](#exact-prefix-filter) - matches logs starting with the given prefix +- [Multi-exact filter](#multi-exact-filter) - matches logs with at least one of the specified exact values +- [Case-insensitive filter](#case-insensitive-filter) - matches logs with the given case-insensitive word, phrase or prefix +- [Sequence filter](#sequence-filter) - matches logs with the given sequence of words or phrases +- [Regexp filter](#regexp-filter) - matches logs for the given regexp +- [Range filter](#range-filter) - matches logs with numeric [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range +- [IPv4 range filter](#ipv4-range-filter) - matches logs with ip address [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range +- [String range filter](#string-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given string range +- [Length range filter](#length-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) of the given length range +- [Logical filter](#logical-filter) - allows combining other filters + + +### Time filter + +VictoriaLogs scans all the logs per each query if it doesn't contain the filter on [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field). +It uses various optimizations in order to speed up full scan queries without the `_time` filter, +but such queries can be slow if the storage contains large number of logs over long time range. The easiest way to optimize queries +is to narrow down the search with the filter on [`_time` field](https://docs.victoriametrics.com/keyConcepts.html#time-field). + +For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +ingested into VictoriaLogs during the last hour, which contain the `error` [word](#word): + +```logsql +_time:(now-1h, now) AND error +``` + +The following formats are supported for `_time` filter: + +- Fixed time: + - `_time:YYYY-MM-DD` - matches all the log messages for the particular day. For example, `_time:2023-04-25` matches all the log messages for April 25, 2023 by UTC. + - `_time:YYYY-MM` - matches all the log messages for the particular month. For example, `_time:2023-02` matches all the log messages for February, 2023 by UTC. + - `_time:YYYY` - matches all the log messages for the particular year. For example, `_time:2023` matches all the log message for 2023 by UTC. + - `_time:YYYY-MM-DDTHH` - matches all the log messages for the particular hour. For example, `_time:2023-04-25T22` matches all the log messages from `22:00` to `23:00` + on April 25, 2023 by UTC. + - `_time:YYYY-MM-DDTHH:MM` - matches all the log messages for the particular minute. For example, `_time:2023-04-25T22:45` matches all the log messages from `22:45` to `22:46` + on April 25, 2023 by UTC. + - `_time:YYYY-MM-DDTHH:MM:SS` - matches all the log messages for the particular second. For example, `_time:2023-04-25T22:45:59` matches all the log messages + from `22:45:59` to `23:46:00` on April 25, 2023 by UTC. + +- Time range: + - `_time:[min_time, max_time]` - matches log messages on the time range `[min_time, max_time]`, including both `min_time` and `max_time`. + The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats). + For example, `_time:[2023-04-01, 2023-04-30]` matches log messages for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04`. + - `_time:[min_time, max_time)` - matches log messages on the time range `[min_time, max_time)`, not including `max_time`. + The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats). + For example, `_time:[2023-02-01, 2023-03-01)` matches log messages for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02`. + +It is possible to specify time zone offset for all the absolute time formats by appending `+hh:mm` or `-hh:mm` suffix. +For example, `_time:2023-04-25+05:30` matches all the log messages on April 25, 2023 by India time zone, +while `_time:2023-02-07:00` matches all the log messages from February, 2023 by California time zone. + +Performance tips: + +- It is recommended specifying the smallest possible time range during the search, since it reduces the amounts of log entries, which need to be scanned during the query. + For example, `_time:[now-1h, now]` is usually faster than `_time:[now-5h, now]`. + +- While LogsQL supports arbitrary number of `_time:...` filters at any level of [logical filters](#logical-filter), + it is recommended specifying a single `_time` filter at the top level of the query. + +- See [other performance tips](#performance-tips). + +See also: + +- [Stream filter](#stream-filter) +- [Word filter](#word-filter) + +### Stream filter + +VictoriaLogs provides an optimized way to select log entries, which belong to particular [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields). +This can be done via `_stream:{...}` filter. The `{...}` may contain arbitrary [Prometheus-compatible label selector](https://docs.victoriametrics.com/keyConcepts.html#filtering) +over fields associated with [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields). +For example, the following query selects [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +with `app` field equal to `nginx`: + +```logsql +_stream:{app="nginx"} +``` + +This query is equivalent to the following [exact()](#exact-filter) query, but the upper query usually works much faster: + +```logsql +app:exact("nginx") +``` + +Performance tips: + +- It is recommended using the most specific `_stream:{...}` filter matching the smallest number of log streams, + which needs to be scanned by the rest of filters in the query. + +- While LogsQL supports arbitrary number of `_stream:{...}` filters at any level of [logical filters](#logical-filter), + it is recommended specifying a single `_stream:...` filter at the top level of the query. + +- See [other performance tips](#performance-tips). + +See also: + +- [Time filter](#time-filter) +- [Exact filter](#exact-filter) + +### Word filter + +The simplest LogsQL query consists of a single [word](#word) to search in log messages. For example, the following query matches +[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with `error` [word](#word) inside them: + +```logsql +error +``` + +This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `error` +- `an error happened` +- `error: cannot open file` + +This query doesn't match the following log messages: + +- `ERROR`, since the filter is case-sensitive by default. Use `i(error)` for this case. See [these docs](#case-insenstive-filter) for details. +- `multiple errors occurred`, since the `errors` word doesn't match `error` word. Use `error*` for this case. See [these docs](#prefix-filter) for details. + +By default the given [word](#word) is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the word and put a colon after it +if it must be searched in the given field. For example, the following query returns log entries containing the `error` [word](#word) in the `log.level` field: + +```logsql +log.level:error +``` + +Both the field name and the word in the query can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: + +```logsql +поле:значение +``` + +Both the field name and the word in the query can be put inside quotes if they contain special chars, which may clash with the query syntax. +For example, the following query searches for the ip `1.2.3.45` in the field `ip:remote`: + +```logsql +"ip:remote":"1.2.3.45" +``` + +See also: + +- [Phrase filter](#phrase-filter) +- [Exact filter](#exact-filter) +- [Prefix filter](#prefix-filter) +- [Logical filter](#logical-filter) + + +### Phrase filter + +Is you need to search for log messages with the specific phrase inside them, then just wrap the phrase in quotes. +The phrase can contain any chars, including whitespace, punctuation, parens, etc. They are taken into account during the search. +For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with `cannot open file` phrase inside them: + +```logsql +"cannot open file" +``` + +This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `ERROR: cannot open file /foo/bar/baz` +- `cannot open file: permission denied` + +This query doesn't match the following log messages: + +- `cannot open file`, since the number of whitespace chars between words doesn't match the number of whitespace chars in the search phrase. + Use `seq("cannot", "open", "file")` query instead. See [these docs](#sequence-filter) for details. +- `open file: cannot do this`, since the message doesn't contain the full phrase requested in the query. If you need matching a message + with all the [words](#word) listed in the query, then use `cannot AND open AND file` query. See [these docs](#logical-filter) for details. +- `cannot open files`, since the message ends with `files` [word](#word) instead of `file` word. Use `"cannot open file"*` query for this case. + See [these docs](#prefix-filter) for details. +- `Cannot open file: failure`, since the `Cannot` word starts with capital letter. Use `i("cannot open file")` for this case. + See [these docs](#case-insensitive-filter) for details. + +By default the given phrase is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the phrase and put a colon after it +if it must be searched in the given field. For example, the following query returns log entries containing the `cannot open file` phrase in the `event.original` field: + +```logsql +event.original:"cannot open file" +``` + +Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: + +```logsql +сообщение:"невозможно открыть файл" +``` + +The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. +For example, the following query searches for the `cannot open file` phrase in the field `some:message`: + +```logsql +"some:message":"cannot open file" +``` + +See also: + +- [Exact filter](#exact-filter) +- [Word filter](#word-filter) +- [Prefix filter](#prefix-filter) +- [Logical filter](#logical-filter) + + +### Prefix filter + +If you need to search for log messages with [words](#word) / phrases containing some prefix, then just add `*` char to the end of the [word](#word) / phrase in the query. +For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain [words](#word) with `err` prefix: + +```logsql +err* +``` + +This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `err: foobar` +- `cannot open file: error occurred` + +This query doesn't match the following log messages: + +- `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details. +- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `re("err")` for this case. See [these docs](#regexp-filter) for details. + +Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches +[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix: + +```logsql +"unexpected fail"* +``` + +This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `unexpected fail: IO error` +- `error:unexpected failure` + +This query doesn't match the following log messages: + +- `unexpectedly failed`, since the `unexpectedly` doesn't match `unexpected` [word](#word). Use `unexpected* AND fail*` for this case. + See [these docs](#logical-filter) for details. +- `failed to open file: unexpected EOF`, since `failed` [word](#word) occurs before the `unexpected` word. Use `unexpected AND fail*` for this case. + See [these docs](#logical-filter) for details. + +By default the prefix filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the prefix filter +in order to apply it to the given field. For example, the following query matches `log.level` field containing any word with the `err` prefix: + +```logsql +log.level:err* +``` + +If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. +For example, the following query matches `log:level` field containing any word with the `err` prefix. + +```logsql +"log:level":err* +``` + +Performance tips: + +- Prefer using [word filters](#word-filter) and [phrase filters](#phrase-filter) combined via [logical filter](#logical-filter) + instead of prefix filter. +- Prefer moving [word filters](#word-filter) and [phrase filters](#phrase-filter) in front of prefix filter when using [logical filter](#logical-filter). +- See [other performance tips](#performance-tips). + +See also: + +- [Exact prefix filter](#exact-prefix-filter) +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Exact-filter](#exact-filter) +- [Logical filter](#logical-filter) + + +### Empty value filter + +Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +This can be performed with `log_field:""` syntax. For example, the following query matches log entries without `host.hostname` field: + +```logsql +host.hostname:"" +``` + +See also: + +- [Any value filter](#any-value-filter) +- [Word filter](#word-filter) +- [Logical filter](#logical-filter) + + +### Any value filter + +Sometimes it is needed to find log entries containing any non-empty value for the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +This can be performed with `log_field:*` syntax. For example, the following query matches log entries with non-empty `host.hostname` field: + +```logsql +host.hostname:* +``` + +See also: + +- [Empty value filter](#empty-value-filter) +- [Prefix filter](#prefix-filter) +- [Logical filter](#logical-filter) + + +### Exact filter + +The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), +which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages +or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact(...)` filter. +For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`: + +```logsql +exact("fatal error: cannot find /foo/bar") +``` + +The query doesn't match the following log messages: + +- `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text + other than the specified in the `exact()` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details. + +- `FATAL ERROR: cannot find /foo/bar`, since the `exact()` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case. + See [these docs](#case-insensitive-filter) for details. + +By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it +if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field: + +```logsql +log.level:exact("error") +``` + +Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: + +```logsql +log.уровень:exact("ошибка") +``` + +The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. +For example, the following query matches the `error` value in the field `log:level`: + +```logsql +"log:level":exact("error") +``` + +See also: + +- [Exact prefix filter](#exact-prefix-filter) +- [Multi-exact filter](#multi-exact-filter) +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Prefix filter](#prefix-filter) +- [Logical filter](#logical-filter) + + +### Exact prefix filter + +Sometimes it is needed to find log messages starting with some prefix. This can be done with the `exact_prefix(...)` filter. +For example, the following query matches log messages, which start from `Processing request` prefix: + +```logsql +exact_prefix("Processing request") +``` + +This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `Processing request foobar` +- `Processing requests from ...` + +It doesn't match the following log messages: + +- `processing request foobar`, since the log message starts with lowercase `p`. Use `exact_prefix("processing request") OR exact_prefix("Processing request")` + query in this case. See [these docs](#logical-filter) for details. +- `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case. + See [these docs](#phrase-filter) for details. + +By default the `exact_prefix()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact_prefix()` filter and put a colon after it +if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix: + +```logsql +log.level:exact_prefix("err") +``` + +Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: + +```logsql +log.уровень:exact_prefix("ошиб") +``` + +The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. +For example, the following query matches `log:level` values starting with `err` prefix: + +```logsql +"log:level":exact_prefix("err") +``` + +See also: + +- [Exact filter](#exact-filter) +- [Prefix filter](#prefix-filter) +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Logical filter](#logical-filter) + + +### Multi-exact filter + +Sometimes it is needed to locate log messages with a field containing at least one of the given values. This can be done with multiple [exact filters](#exact-filter) +combined into a single [logical filter](#logical-filter). For example, the following query matches log messages with `log.level` field +containing either `error` or `fatal` exact values: + +```logsql +log.level:(exact("error") OR exact("fatal")) +``` + +While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter. + +```logsql +log.level:in("error", "fatal") +``` + +It works very fast for long lists passed to `in()`. + +The future VictoriaLogs versions will allow passing arbitrary [queries](#query-syntax) into `in()` filter. +For example, the following query selects all the logs for the last hour for users, who visited pages with `admin` [word](#word) in the `path` +during the last day: + +```logsql +_time:[now-1h,now] AND user_id:in(_time:[now-1d,now] AND path:admin | fields user_id) +``` + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +See also: + +- [Exact filter](#exact-filter) +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Prefix filter](#prefix-filter) +- [Logical filter](#logical-filter) + + +### Case-insensitive filter + +Case-insensitive filter can be applied to any word, phrase or prefix by wrapping the corresponding [word filter](#word-filter), +[phrase filter](#phrase-filter) or [prefix filter](#prefix-filter) into `i()`. For example, the following query returns +log messages with `error` word in any case: + +```logsql +i(error) +``` + +The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `unknown error happened` +- `ERROR: cannot read file` +- `Error: unknown arg` +- `An ErRoR occured` + +The query doesn't match the following log messages: + +- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `re("(?i)error")` for this case. See [these docs](#regexp-filter) for details. +- `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case. + +By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter +in order to apply it to the given field. For example, the following query matches `log.level` field containing `error` [word](#word) in any case: + +```logsql +log.level:i(error) +``` + +If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. +For example, the following query matches `log:level` field containing `error` [word](#word) in any case. + +```logsql +"log:level":i("error") +``` + +Performance tips: + +- Prefer using case-sensitive filter over case-insensitive filter. +- Prefer moving [word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter) in front of case-sensitive filter + when using [logical filter](#logical-filter). +- See [other performance tips](#performance-tips). + + +See also: + +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Exact-filter](#exact-filter) +- [Logical filter](#logical-filter) + + +### Sequence filter + +Sometimes it is needed to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +with [words](#word) or phrases in a particular order. For example, if log messages with `error` word followed by `open file` phrase +must be found, then the following LogsQL query can be used: + +```logsql +seq("error", "open file") +``` + +This query matches `some error: cannot open file /foo/bar` message, since the `open file` phrase goes after the `error` [word](#word). +The query doesn't match the `cannot open file: error` message, since the `open file` phrase is located in front of the `error` [word](#word). +If you need matching log messages with both `error` word and `open file` phrase, then use `error AND "open file"` query. See [these docs](#logical-filter) +for details. + +By default the `seq()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter +in order to apply it to the given field. For example, the following query matches `event.original` field containing `(error, "open file")` sequence: + +```logsql +event.original:seq(error, "open file") +``` + +If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. +For example, the following query matches `event:original` field containing `(error, "open file")` sequence: + +```logsql +"event.original":seq(error, "open file") +``` + +See also: + +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Exact-filter](#exact-filter) +- [Logical filter](#logical-filter) + + +### Regexp filter + +LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `re(...)` expression. +For example, the following query returns all the log messages containing `error` or `warn` susbstrings: + +```logsql +re("error|warn") +``` + +The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): + +- `error: cannot read data` +- `A warning has been raised` + +By default the `re()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter +in order to apply it to the given field. For example, the following query matches `event.original` field containing either `error` or `warn` substrings: + +```logsql +event.original:re("error|warn") +``` + +If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. +For example, the following query matches `event:original` field containing either `error` or `warn` substrings: + +```logsql +"event.original":re("error|warn") +``` + +Performance tips: + +- Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter. + For example, the `re("error|warning")` query can be substituted with `error OR warning` query, which usually works much faster. + See also [multi-exact filter](#multi-exact-filter). +- Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first. +- Prefer using `exact_prefix("some prefix")` instead of `re("^some prefix")`, since the [exact_prefix()](#exact-prefix-filter) works much faster than the `re()` filter. +- See [other performance tips](#performance-tips). + +See also: + +- [Case-insensitive filter](#case-insensitive-filter) +- [Logical filter](#logical-filter) + + +### Range filter + +If you need to filter log message by some field containing only numeric values, then the `range()` filter can be used. +For example, if the `request.duration` field contains the request duration in seconds, then the following LogsQL query can be used +for searching for log entries with request durations exceeding 4.2 seconds: + +```logsql +request.duration:range(4.2, Inf) +``` + +The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding +parentheses with square brackets. For example: + +- `range[1, 10)` includes `1` in the matching range +- `range(1, 10]` includes `10` in the matching range +- `range[1, 10]` includes `1` and `10` in the matching range + +Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds` +[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text. +Extract the numeric value from the message with `parse(_msg, "the request took seconds")` [transformation](#transformations) +and then apply the `range()` [post-filter](#post-filters) to the extracted `request_duration` field. + +Performance tips: + +- It is better to query pure numeric [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) + instead of extracting numeric field from text field via [transformations](#transformations) at query time. +- See [other performance tips](#performance-tips). + +See also: + +- [IPv4 range filter](#ipv4-range-filter) +- [String range filter](#string-range-filter) +- [Length range filter](#length-range-filter) +- [Logical filter](#logical-filter) + + +### IPv4 range filter + +If you need to filter log message by some field containing only [IPv4](https://en.wikipedia.org/wiki/Internet_Protocol_version_4) addresses such as `1.2.3.4`, +then the `ipv4_range()` filter can be used. For example, the following query matches log entries with `user.ip` address in the range `[127.0.0.0 - 127.255.255.255]`: + +```logsql +user.ip:ipv4_range(127.0.0.0, 127.255.255.255) +``` + +The `ipv4_range()` accepts also IPv4 subnetworks in [CIDR notation](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation). +For example, the following query is equivalent to the query above: + +```logsql +user.ip:ipv4_range("127.0.0.0/8") +``` + +If you need matching a single IPv4 address, then just put it inside `ipv4_range()`. For example, the following query matches `1.2.3.4` IP +at `user.ip` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model): + +```logsql +user.ip:ipv4_range("1.2.3.4") +``` + +Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")` +doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), +since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from : done")` [transformation](#transformations) +and then apply the `ipv4_range()` [post-filter](#post-filters) to the extracted `ip` field. + +Hints: + +- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the given `X.Y.Z.Q` IPv4 address, + then `"X.Y.Z.Q"` query can be used. See [these docs](#phrase-filter) for details. +- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing + at least a single IPv4 address out of the given list, then `"ip1" OR "ip2" OR ... "ipN"` query can be used. See [these docs](#logical-filter) for details. +- If you need finding log entries with `ip` field in multiple ranges, then use `ip:(ipv4_range(range1) OR ipv4_range(range2) ... OR ipv4_range(rangeN)` query. + See [these docs](#logical-filter) for details. + +Performance tips: + +- It is better querying pure IPv4 [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) + instead of extracting IPv4 from text field via [transformations](#transformations) at query time. +- See [other performance tips](#performance-tips). + +See also: + +- [Range filter](#range-filter) +- [String range filter](#string-range-filter) +- [Length range filter](#length-range-filter) +- [Logical filter](#logical-filter) + + +### String range filter + +If you need to filter log message by some field with string values in some range, then `string_range()` filter can be used. +For example, the following LogsQL query matches log entries with `user.name` field starting from `A` and `B` chars: + +```logsql +user.name:string_range(A, C) +``` + +The `string_range()` includes the lower bound, while excluding the upper bound. This simplifies querying distinct sets of logs. +For example, the `user.name:string_range(C, E)` would match `user.name` fields, which start from `C` and `D` chars. + +See also: + +- [Range filter](#range-filter) +- [IPv4 range filter](#ipv4-range-filter) +- [Length range filter](#length-range-filter) +- [Logical filter](#logical-filter) + + +### Length range filter + +If you need to filter log message by its length, then `len_range()` filter can be used. +For example, the following LogsQL query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +with lengths in the range `[5, 10]` chars: + +```logsql +len_range(5, 10) +``` + +This query matches the following log messages, since their length is in the requested range: + +- `foobar` +- `foo bar` + +This query doesn't match the following log messages: + +- `foo`, since it is too short +- `foo bar baz abc`, sinc it is too long + +By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply +the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars: + +```logsql +foo:len_range(10, 20) +``` + +See also: + +- [Range filter](#range-filter) +- [Logical filter](#logical-filter) + + +### Logical filter + +Simpler LogsQL [filters](#filters) can be combined into more complex filters with the following logical operations: + +- `q1 AND q2` - matches common log entries returned by both `q1` and `q2`. Arbitrary number of [filters](#filters) can be combined with `AND` operation. + For example, `error AND file AND app` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), + which simultaneously contain `error`, `file` and `app` [words](#word). + The `AND` operation is frequently used in LogsQL queries, so it is allowed to skip the `AND` word. + For example, `error file app` is equivalent to `error AND file AND app`. + +- `q1 OR q2` - merges log entries returned by both `q1` and `q2`. Aribtrary number of [filters](#filters) can be combined with `OR` operation. + For example, `error OR warning OR info` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), + which contain at least one of `error`, `warning` or `info` [words](#word). + +- `NOT q` - returns all the log entries except of those which match `q`. For example, `NOT info` returns all the + [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), + which do not contain `info` [word](#word). The `NOT` operation is frequently used in LogsQL queries, so it is allowed substituting `NOT` with `!` in queries. + For example, `!info` is equivalent to `NOT info`. + +The `NOT` operation has the highest priority, `AND` has the middle priority and `OR` has the lowest priority. +The priority order can be changed with parentheses. For example, `NOT info OR debug` is interpreted as `(NOT info) OR debug`, +so it matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), +which do not contain `info` [word](#word), while it also matches messages with `debug` word (which may contain the `info` word). +This is not what most users expect. In this case the query can be rewritten to `NOT (info OR debug)`, +which correctly returns log messages without `info` and `debug` [words](#word). + +LogsQL supports arbitrary complex logical queries with arbitrary mix of `AND`, `OR` and `NOT` operations and parentheses. + +By default logical filters apply to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +unless the inner filters explicitly specify the needed [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) via `field_name:filter` syntax. +For example, `(error OR warn) AND host.hostname:host123` is interpreted as `(_msg:error OR _msg:warn) AND host.hostname:host123`. + +It is possible to specify a single [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) for multiple filters +with the following syntax: + +```logsql +field_name:(q1 OR q2 OR ... qN) +``` + +For example, `log.level:error OR log.level:warning OR log.level:info` can be substituted with the shorter query: `log.level:(error OR warning OR info)`. + +Performance tips: + +- VictoriaLogs executes logical operations from the left to the right, so it is recommended moving the most specific + and the fastest filters (such as [word filter](#word-filter) and [phrase filter](#phrase-filter)) to the left, + while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter)) + to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) + with the `error` word, which match some `/foo/(bar|baz)` regexp, + it is better from performance PoV to use the query `error re("/foo/(bar|baz)")` instead of `re("/foo/(bar|baz)") error`. + + The most specific filter means that it matches the lowest number of log entries comparing to other filters. + +- See [other performance tips](#performance-tips). + +## Stream context + +LogsQL will support the ability to select the given number of surrounding log lines for the selected log lines +on a [per-stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) basis. + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Transformations + +It is possible to perform various transformations on the [selected log entries](#filters) at client side +with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +LogsQL will support the following transformations for the [selected](#filters) log entries: + +- Extracting the specified fields from text [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern. +- Extracting the specified fields from JSON strings stored inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +- Extracting the specified fields from [logfmt](https://brandur.org/logfmt) strings stored + inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) + according to the provided format. +- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +- Copying of the existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). +- Parsing duration strings into floating-point seconds for further [stats calculations](#stats). +- Creating a boolean field with the result of arbitrary [post-filters](#post-filters) applied to the current fields. + Boolean fields may be useful for [conditional stats calculation](#stats). +- Creating an integer field with the length of the given field value. This can be useful for [stats calculations](#stats). + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Post-filters + +It is possible to perform post-filtering on the [selected log entries](#filters) at client side with `grep` or similar Unix commands +according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +LogsQL will support post-filtering on the original [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +and fields created by various [transformations](#transformations). The following post-filters will be supported: + +- Full-text [filtering](#filters). +- [Logical filtering](#logical-filter). + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Stats + +It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands +according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +LogsQL will support calculating the following stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +and fields created by [transformations](#transformations): + +- The number of selected logs. +- The number of non-empty values for the given field. +- The number of unique values for the given field. +- The min, max, avg, and sum for the given field. +- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field. + +It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats. +For example, `sumIf(response_size, is_admin:true)` calculates the total response size for admins only. + +It will be possible to group stats by the specified [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) +and by the specified time buckets. + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Sorting + +By default VictoriaLogs doesn't sort the returned results because of performance and efficiency concerns +described [here](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +It is possible to sort the [selected log entries](#filters) at client side with `sort` Unix command +according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +LogsQL will support results' sorting by the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Limiters + +It is possible to limit the returned results with `head`, `tail`, `less`, etc. Unix commands +according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +LogsQL will support the ability to limit the number of returned results alongside the ability to page the returned results. +Additionally, LogsQL will provide the ability to select fields, which must be returned in the response. + +See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. + +## Performance tips + +- It is highly recommended specifying [time filter](#time-filter) in order to narrow down the search to specific time range. +- It is highly recommended specifying [stream filter](#stream-filter) in order to narrow down the search + to specific [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields). +- Move faster filters such as [word filter](#word-filter) and [phrase filter](#phrase-filter) to the beginning of the query. + This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query. +- Move more specific filters, which match lower number of log entries, to the beginning of the query. + This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query. diff --git a/docs/VictoriaLogs/README.md b/docs/VictoriaLogs/README.md new file mode 100644 index 000000000..8cc1d660e --- /dev/null +++ b/docs/VictoriaLogs/README.md @@ -0,0 +1,481 @@ +# VictoriaLogs + +VictoriaLogs is log management and log analytics system from [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/). + +It provides the following key features: + +- VictoriaLogs can accept logs from popular log collectors, which support + [ElasticSearch data ingestion format](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html). See [these docs](#data-ingestion). + [Grafana Loki data ingestion format](https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki) will be supported in the near future - + see [the Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html). +- VictoriaLogs is much easier to setup and operate comparing to ElasticSearch and Grafana Loki. See [these docs](#operation). +- VictoriaLogs provides easy yet powerful query language with full-text search capabilities across + all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) - + see [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html). +- VictoriaLogs can be seamlessly combined with good old Unix tools for log analysis such as `grep`, `less`, `sort`, `jq`, etc. + See [these docs](#querying-via-command-line) for details. +- VictoriaLogs capacity and performance scales lineraly with the available resources (CPU, RAM, disk IO, disk space). + It runs smoothly on both Raspberry PI and a beefy server with hundreds of CPU cores and terabytes of RAM. +- VictoriaLogs can handle much bigger data volumes than ElasticSearch and Grafana Loki when running on comparable hardware. + A single-node VictoriaLogs instance can substitute large ElasticSearch cluster. + +## Operation + +### How to run VictoriaLogs + +Checkout VictoriaLogs source code. It is located in the VictoriaMetrics repository: + +```bash +git clone https://github.com/VictoriaMetrics/VictoriaMetrics +cd VictoriaMetrics +``` + +Then build VictoriaLogs. The build command requires [Go 1.20](https://golang.org/doc/install). + +```bash +make victoria-logs +``` + +Then run the built binary: + +```bash +bin/victoria-logs +``` + +VictoriaLogs is ready to [receive logs](#data-ingestion) and [query logs](#querying) at the TCP port `9428` now! +It has no any external dependencies, so it may run in various environments without additional setup and configuration. +VictoriaLogs automatically adapts to the available CPU and RAM resources. It also automatically setups and creates +the needed indexes during [data ingestion](#data-ingestion). + +It is possible to change the TCP port via `-httpListenAddr` command-line flag. For example, the following command +starts VictoriaLogs, which accepts incoming requests at port `9200` (aka ElasticSearch HTTP API port): + +```bash +/path/to/victoria-logs -httpListenAddr=:9200 +``` + +VictoriaLogs stores the ingested data to the `victoria-logs-data` directory by default. The directory can be changed +via `-storageDataPath` command-line flag. See [these docs](#storage) for details. + +By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range. +E.g. it uses the retention of 7 days. Read [these docs](#retention) on how to control the retention for the [ingested](#data-ingestion) logs. + +It is recommended setting up monitoring of VictoriaLogs according to [these docs](#monitoring). + +### Data ingestion + +VictoriaLogs supports the following data ingestion techniques: + +- Via [Filebeat](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-overview.html). See [these docs](#filebeat-setup). +- Via [Logstash](https://www.elastic.co/guide/en/logstash/current/introduction.html). See [these docs](#logstash-setup). + +The ingested log entries can be queried according to [these docs](#querying). + +#### Data ingestion troubleshooting + +VictoriaLogs provides the following command-line flags, which can help debugging data ingestion issues: + +- `-logNewStreams` - if this flag is passed to VictoriaLogs, then it logs all the newly + registered [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields). + This may help debugging [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality). +- `-logIngestedRows` - if this flag is passed to VictoriaLogs, then it logs all the ingested + [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). + +VictoriaLogs exposes various [metrics](#monitoring), which may help debugging data ingestion issues: + +- `vl_rows_ingested_total` - the number of ingested [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) + since the last VictoriaLogs restart. If this number icreases over time, then logs are successfully ingested into VictoriaLogs. + The ingested logs can be inspected in logs by passing `-logIngestedRows` command-line flag to VictoriaLogs. +- `vl_streams_created_total` - the number of created [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) + since the last VictoriaLogs restart. If this metric grows rapidly during extended periods of time, then this may lead + to [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality). + The newly created log streams can be inspected in logs by passing `-logNewStreams` command-line flag to VictoriaLogs. + +#### Filebeat setup + +Specify [`output.elasicsearch`](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) section in the `filebeat.yml` +for sending the collected logs to VictoriaLogs: + +```yml +output.elasticsearch: + hosts: ["http://localhost:9428/insert/elasticsearch/"] + parameters: + _msg_field: "message" + _time_field: "@timestamp" + _stream_fields: "host.hostname,log.file.path" +``` + +Substitute the `localhost:9428` address inside `hosts` section with the real TCP address of VictoriaLogs. + +The `_msg_field` parameter must contain the field name with the log message generated by Filebeat. This is usually `message` field. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details. + +The `_time_field` parameter must contain the field name with the log timestamp generated by Filebeat. This is usually `@timestamp` field. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details. + +It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Filebeat, in the `_stream_fields` parameter. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details. + +If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed, +then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore. +For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs: + +```yml +output.elasticsearch: + hosts: ["http://localhost:9428/insert/elasticsearch/"] + parameters: + _msg_field: "message" + _time_field: "@timestamp" + _stream_fields: "host.name,log.file.path" + ignore_fields: "log.offset,event.original" +``` + +When Filebeat ingests logs into VictoriaLogs at a high rate, then it may be needed to tune `worker` and `bulk_max_size` options. +For example, the following config is optimized for higher than usual ingestion rate: + +```yml +output.elasticsearch: + hosts: ["http://localhost:9428/insert/elasticsearch/"] + parameters: + _msg_field: "message" + _time_field: "@timestamp" + _stream_fields: "host.name,log.file.path" + worker: 8 + bulk_max_size: 1000 +``` + +If the Filebeat sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `compression_level` option. +This usually allows saving network bandwidth and costs by up to 5 times: + +```yml +output.elasticsearch: + hosts: ["http://localhost:9428/insert/elasticsearch/"] + parameters: + _msg_field: "message" + _time_field: "@timestamp" + _stream_fields: "host.name,log.file.path" + compression_level: 1 +``` + +By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy). +If you need storing logs in other tenant, then specify the needed tenant via `headers` at `output.elasticsearch` section. +For example, the following `filebeat.yml` config instructs Filebeat to store the data to `(AccountID=12, ProjectID=34)` tenant: + +```yml +output.elasticsearch: + hosts: ["http://localhost:9428/insert/elasticsearch/"] + headers: + AccountID: 12 + ProjectID: 34 + parameters: + _msg_field: "message" + _time_field: "@timestamp" + _stream_fields: "host.name,log.file.path" +``` + +The ingested log entries can be queried according to [these docs](#querying). + +See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs. + +#### Logstash setup + +Specify [`output.elasticsearch`](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) section in the `logstash.conf` file +for sending the collected logs to VictoriaLogs: + +```conf +output { + elasticsearch { + hosts => ["http://localhost:9428/insert/elasticsearch/"] + parameters => { + "_msg_field" => "message" + "_time_field" => "@timestamp" + "_stream_fields" => "host.name,process.name" + } + } +} +``` + +Substitute `localhost:9428` address inside `hosts` with the real TCP address of VictoriaLogs. + +The `_msg_field` parameter must contain the field name with the log message generated by Logstash. This is usually `message` field. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details. + +The `_time_field` parameter must contain the field name with the log timestamp generated by Logstash. This is usually `@timestamp` field. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details. + +It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Logstash, in the `_stream_fields` parameter. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details. + +If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed, +then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore. +For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs: + +```conf +output { + elasticsearch { + hosts => ["http://localhost:9428/insert/elasticsearch/"] + parameters => { + "_msg_field" => "message" + "_time_field" => "@timestamp" + "_stream_fields" => "host.hostname,process.name" + "ignore_fields" => "log.offset,event.original" + } + } +} +``` + +If the Logstash sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `http_compression: true` option. +This usually allows saving network bandwidth and costs by up to 5 times: + +```conf +output { + elasticsearch { + hosts => ["http://localhost:9428/insert/elasticsearch/"] + parameters => { + "_msg_field" => "message" + "_time_field" => "@timestamp" + "_stream_fields" => "host.hostname,process.name" + } + http_compression => true + } +} +``` + +By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy). +If you need storing logs in other tenant, then specify the needed tenant via `custom_headers` at `output.elasticsearch` section. +For example, the following `logstash.conf` config instructs Logstash to store the data to `(AccountID=12, ProjectID=34)` tenant: + +```conf +output { + elasticsearch { + hosts => ["http://localhost:9428/insert/elasticsearch/"] + custom_headers => { + "AccountID" => "1" + "ProjectID" => "2" + } + parameters => { + "_msg_field" => "message" + "_time_field" => "@timestamp" + "_stream_fields" => "host.hostname,process.name" + } + } +} +``` + +The ingested log entries can be queried according to [these docs](#querying). + +See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs. + +### Querying + +VictoriaLogs can be queried at the `/select/logsql/query` endpoint. The [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) +query must be passed via `query` argument. For example, the following query returns all the log entries with the `error` word: + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error' +``` + +The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body +with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries +when they do not fit the maximum url length of the used clients and proxies. + +See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for details on what can be passed to the `query` arg. +The `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl` +or similar tools. + +The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON), +where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`. +Example response: + +``` +{"_msg":"error: disconnect from 19.54.37.22: Auth fail [preauth]","_stream":"{}","_time":"2023-01-01T13:32:13Z"} +{"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"} +``` + +The matching lines are sent to the response stream as soon as they are found in VictoriaLogs storage. +This means that the returned response may contain billions of lines for queries matching too many log entries. +The response can be interrupted at any time by closing the connection to VictoriaLogs server. +This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc. +See [these docs](#querying-via-command-line) for more details. + +The returned lines aren't sorted by default, since sorting disables the ability to send matching log entries to response stream as soon as they are found. +Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting) +or at client side with the usual `sort` command according to [these docs](#querying-via-command-line). + +By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy) is queried. +If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches +for log messages at `(AccountID=12, ProjectID=34)` tenant: + +```bash +curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID: 34' -d 'query=error' +``` + +The number of requests to `/select/logsql/query` can be [monitored](#monitoring) with `vl_http_requests_total{path="/select/logsql/query"}` metric. + +#### Querying via command-line + +VictoriaLogs provides good integration with `curl` and other command-line tools because of the following features: + +- VictoriaLogs sends the matching log entries to the response stream as soon as they are found. + This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix)). +- VictoriaLogs automatically adjusts query execution speed to the speed of the client, which reads the response stream. + For example, if the response stream is piped to `less` command, then the query is suspended + until the `less` command reads the next block from the response stream. +- VictoriaLogs automatically cancels query execution when the client closes the response stream. + For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query + when the `head` command closes the response stream. + +These features allow executing queries at command-line interface, which potentially select billions of rows, +without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server. + +For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains +many log messages with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word): + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error' +``` + +If the command returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query. +VictoriaLogs notices that the response stream is closed, so it cancels the query and instantly stops consuming CPU, RAM and disk IO for this query. + +Then just use `head` command for investigating the returned log messages and narrowing down the query: + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error' | head -10 +``` + +The `head -10` command reads only the first 10 log messages from the response and then closes the response stream. +This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources. + +Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response: + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error' | less +``` + +The `less` command reads the response stream on demand, when the user scrolls down the output. +VictoriaLogs suspends query execution when `less` stops reading the response stream. +It doesn't consume CPU and disk IO resources during this time. It resumes query execution +when the `less` continues reading the response stream. + +Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain +`cannot open file` [phrase](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#phrase-filter). +Then the query can be narrowed down to `error AND "cannot open file"` +(see [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator). +Then run the updated command in order to continue the investigation: + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error AND "cannot open file"' | head +``` + +Note that the `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl` +or similar tools. + +The `pipe the query to "head" or "less" -> investigate the results -> refine the query` iteration +can be repeated multiple times until the needed log messages are found. + +The returned VictoriaLogs query response can be post-processed with any combination of Unix commands, +which are usually used for log analysis - `grep`, `jq`, `awk`, `sort`, `uniq`, `wc`, etc. + +For example, the following command uses `wc -l` Unix command for counting the number of log messages +with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word) +received from [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) with `app="nginx"` field +during the last 5 minutes: + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:[now-5m,now] AND error' | wc -l +``` + +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) about `_stream` filter, +[these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) about `_time` filter +and [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator. + +The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field): + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less +``` + +This command uses `jq` for extracting [`_time`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) +and [`_msg`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) fields from the returned results, +and piping them to `sort` command. + +Note that the `sort` command needs to read all the response stream before returning the sorted results. So the command above +can take non-trivial amounts of time if the `query` returns too many results. The solution is to narrow down the `query` +before sorting the results. See [these tips](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#performance-tips) +on how to narrow down query results. + +The following example calculates stats on the number of log messages received during the last 5 minutes +grouped by `log.level` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model): + +```bash +curl http://localhost:9428/select/logsql/query -d 'query=_time:[now-5m,now] log.level:*' | jq -r '."log.level"' | sort | uniq -c +``` + +The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter), +then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values +are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats. + +See also: + +- [Key concepts](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html). +- [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html). + + +### Monitoring + +VictoriaLogs exposes internal metrics in Prometheus exposition format at `http://localhost:9428/metrics` page. +It is recommended to set up monitoring of these metrics via VictoriaMetrics +(see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)), +vmagent (see [these docs](https://docs.victoriametrics.com/vmagent.html#how-to-collect-metrics-in-prometheus-format)) or via Prometheus. + +VictoriaLogs emits own logs to stdout. It is recommended investigating these logs during troubleshooting. + + +### Retention + +By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range. +E.g. it uses the retention of 7 days. The retention can be configured with `-retentionPeriod` command-line flag. +This flag accepts values starting from `1d` (one day) up to `100y` (100 years). See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations) +for the supported duration formats. + +For example, the following command starts VictoriaLogs with the retention of 8 weeks: + +```bash +/path/to/victoria-logs -retentionPeriod=8w +``` + +VictoriaLogs stores the [ingested](#data-ingestion) logs in per-day partition directories. It automatically drops partition directories +outside the configured retention. + +VictoriaLogs automatically drops logs at [data ingestion](#data-ingestion) stage if they have timestamps outside the configured retention. +A sample of dropped logs is logged with `WARN` message in order to simplify troubleshooting. +The `vlinsert_rows_dropped_total` [metric](#monitoring) is incremented each time an ingested log entry is dropped because of timestamp outside the retention. +It is recommended setting up the following alerting rule at [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to be notified +when logs with wrong timestamps are ingested into VictoriaLogs: + +```metricsql +rate(vlinsert_rows_dropped_total[5m]) > 0 +``` + +By default VictoriaLogs doesn't accept log entries with timestamps bigger than `now+2d`, e.g. 2 days in the future. +If you need accepting logs with bigger timestamps, then specify the desired "future retention" via `-futureRetention` command-line flag. +This flag accepts values starting from `1d`. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations) +for the supported duration formats. + +For example, the following command starts VictoriaLogs, which accepts logs with timestamps up to a year in the future: + +```bash +/path/to/victoria-logs -futureRetention=1y +``` + +### Storage + +VictoriaLogs stores all its data in a single directory - `victoria-logs-data`. The path to the directory can be changed via `-storageDataPath` command-line flag. +For example, the following command starts VictoriaLogs, which stores the data at `/var/lib/victoria-logs`: + +```bash +/path/to/victoria-logs -storageDataPath=/var/lib/victoria-logs +``` + +VictoriaLogs automatically creates the `-storageDataPath` directory on the first run if it is missing. diff --git a/docs/VictoriaLogs/Roadmap.md b/docs/VictoriaLogs/Roadmap.md new file mode 100644 index 000000000..1fb685399 --- /dev/null +++ b/docs/VictoriaLogs/Roadmap.md @@ -0,0 +1,37 @@ +# VictoriaLogs roadmap + +The VictoriaLogs Preview is ready for evaluation in production. It is recommended running it alongside the existing solutions +such as ElasticSearch and Grafana Loki and comparing their resource usage and usability. +It isn't recommended migrating from existing solutions to VictoriaLogs Preview yet. + +The following functionality is available in VictoriaLogs Preview: + +- [Data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). +- [Querying](https://docs.victoriametrics.com/VictoriaLogs/#querying). +- [Querying via command-line](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line). + +See [operation docs](https://docs.victoriametrics.com/VictoriaLogs/#operation) for details. + +The following functionality is planned in the future versions of VictoriaLogs: + +- Support for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) from popular log collectors and formats: + - Promtail (aka Grafana Loki) + - Vector.dev + - Fluentbit + - Fluentd + - Syslog +- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html): + - [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context). + - [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations). + - [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters). + - [Stats calculations](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stats). + - [Sorting](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting). + - [Limiters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#limiters). + - The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function. +- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`. +- Web UI with the following abilities: + - Explore the ingested logs. + - Build graphs over time for the ingested logs. +- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots). +- Cluster version of VictoriaLogs. +- Ability to store data to object storage (such as S3, GCS, Minio). diff --git a/docs/VictoriaLogs/keyConcepts.md b/docs/VictoriaLogs/keyConcepts.md new file mode 100644 index 000000000..685fe7749 --- /dev/null +++ b/docs/VictoriaLogs/keyConcepts.md @@ -0,0 +1,219 @@ +# VictoriaLogs key concepts + +## Data model + +VictoriaLogs works with structured logs. Every log entry may contain arbitrary number of `key=value` pairs (aka fields). +A single log entry can be expressed as a single-level [JSON](https://www.json.org/json-en.html) object with string keys and values. +For example: + +```json +{ + "job": "my-app", + "instance": "host123:4567", + "level": "error", + "client_ip": "1.2.3.4", + "trace_id": "1234-56789-abcdef", + "_msg": "failed to serve the client request" +} +``` + +VictoriaLogs automatically transforms multi-level JSON (aka nested JSON) into single-level JSON +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) according to the following rules: + +- Nested dictionaries are flattened by concatenating dictionary keys with `.` char. For example, the following multi-level JSON + is transformed into the following single-level JSON: + + ```json + { + "host": { + "name": "foobar" + "os": { + "version": "1.2.3" + } + } + } + ``` + + ```json + { + "host.name": "foobar", + "host.os.version": "1.2.3" + } + ``` + +- Arrays, numbers and boolean values are converted into strings. This simplifies [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) over such values. + For example, the following JSON with an array, a number and a boolean value is converted into the following JSON with string values: + + ```json + { + "tags": ["foo", "bar"], + "offset": 12345, + "is_error": false + } + ``` + + ```json + { + "tags": "[\"foo\", \"bar\"]", + "offset": "12345", + "is_error": "false" + } + ``` + +Both label name and label value may contain arbitrary chars. Such chars must be encoded +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) +according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7). +Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding: + +```json +{ + "label with whitepsace": "value\nwith\nnewlines", + "Поле": "价值", +} +``` + +VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) logs. +This enables [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) across all the fields. + +VictoriaLogs supports the following field types: + +* [`_msg` field](#message-field) +* [`_time` field](#time-field) +* [`_stream` fields](#stream-fields) +* [other fields](#other-fields) + + +### Message field + +Every ingested [log entry](#data-model) must contain at least a `_msg` field with the actual log message. For example, this is the minimal +log entry, which can be ingested into VictoriaLogs: + +```json +{ + "_msg": "some log message" +} +``` + +If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field +via `_msg_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). +For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). + +### Time field + +The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. +For example: + +```json +{ + "_msg": "some log message", + "_time": "2023-04-12T06:38:11.095Z" +} +``` + +If the actual timestamp has other than `_time` field name, then it is possible to specify the real timestamp +field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). +For example, if timestamp is located in the `event.created` field, then specify `_time_field=event.created` query arg +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). + +If `_time` field is missing, then the data ingestion time is used as log entry timestamp. + +The log entry timestamp allows quickly narrowing down the search to a particular time range. +See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details. + +### Stream fields + +Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries. +This may be either a single field such as `instance=host123:456` or a set of fields such as +`(datacenter=..., env=..., job=..., instance=...)` or +`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`. + +Log entries received from a single application instance form a log stream in VictoriaLogs. +VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits: + +- Reduced disk space usage, since a log stream from a single application instance is usually compressed better + than a mixed log stream from multiple distinct applications. + +- Increased query performance, since VictoriaLogs needs to scan lower amounts of data + when [searching by stream labels](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter). + +VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream, +so it stores all the received log entries in a single default stream - `{}`. +This may lead to not-so-optimal resource usage and query performance. + +Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg +during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion). +For example, if logs from Kubernetes containers have the following fields: + +```json +{ + "kubernetes.namespace": "some-namespace", + "kubernetes.node.name": "some-node", + "kubernetes.pod.name": "some-pod", + "kubernetes.container.name": "some-container", + "_msg": "some log message" +} +``` + +then sepcify `_stream_fields=kubernetes.namespace,kubernetes.node.name,kubernetes.pod.name,kubernetes.container.name` +query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) in order to properly store +per-container logs into distinct streams. + +#### How to determine which fields must be associated with log streams? + +[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions: + +- Fields, which remain constant across log entries received from a single application instance. +- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc. + +Sometimes a single application instance may generate multiple log streams and store them into distinct log files. +In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields. + +Structured logs may contain big number of fields, which do not change across log entries received from a single application instance. +There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify +the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`, +can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter). + +Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details. + +#### High cardinality + +Some fields in the [ingested logs](#data-model) may contain big number of unique values across log entries. +For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values. +VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields). + +Never associate high-cardinality fields with [log streams](#stream-fields), since this may result +to the following issues: + +- Performance degradation during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) + and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying) +- Increased memory usage +- Increased CPU usage +- Increased disk space usage +- Increased disk read / write IO + +VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriametrics.com/VictoriaLogs/#monitoring), +which shows the number of created streams since the last VictoriaLogs restart. If this metric grows at a rapid rate +during long period of time, then there are high chances of high cardinality issues mentioned above. +VictoriaLogs can log all the newly registered streams when `-logNewStreams` command-line flag is passed to it. +This can help narrowing down and eliminating high-cardinality fields from [log streams](#stream-fields). + +### Other fields + +The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries. +For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message. +E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. + +See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for more details. + +## Multitenancy + +VictoriaLogs supports multitenancy. A tenant is identified by `(AccountID, ProjectID)` pair, where `AccountID` and `ProjectID` are arbitrary 32-bit unsigned integeres. +The `AccountID` and `ProjectID` fields can be set during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) +and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying) via `AccountID` and `ProjectID` request headers. + +If `AccountID` and/or `ProjectID` request headers aren't set, then the default `0` value is used. + +VictoriaLogs has very low overhead for per-tenant management, so it is OK to have thousands of tenants in a single VictoriaLogs instance. + +VictoriaLogs doesn't perform per-tenant authorization. Use [vmauth](https://docs.victoriametrics.com/vmauth.html) or similar tools for per-tenant authorization. diff --git a/lib/logstorage/arena.go b/lib/logstorage/arena.go new file mode 100644 index 000000000..ab68b747b --- /dev/null +++ b/lib/logstorage/arena.go @@ -0,0 +1,31 @@ +package logstorage + +import ( + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" +) + +type arena struct { + b []byte +} + +func (a *arena) reset() { + a.b = a.b[:0] +} + +func (a *arena) copyBytes(b []byte) []byte { + ab := a.b + abLen := len(ab) + ab = append(ab, b...) + result := ab[abLen:] + a.b = ab + return result +} + +func (a *arena) newBytes(size int) []byte { + ab := a.b + abLen := len(ab) + ab = bytesutil.ResizeWithCopyMayOverallocate(ab, abLen+size) + result := ab[abLen:] + a.b = ab + return result +} diff --git a/lib/logstorage/block.go b/lib/logstorage/block.go new file mode 100644 index 000000000..83834c895 --- /dev/null +++ b/lib/logstorage/block.go @@ -0,0 +1,650 @@ +package logstorage + +import ( + "fmt" + "sort" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// block represents a block of log entries. +type block struct { + // timestamps contains timestamps for log entries. + timestamps []int64 + + // columns contains values for fields seen in log entries. + columns []column + + // constColumns contains fields with constant values across all the block entries. + constColumns []Field +} + +func (b *block) reset() { + b.timestamps = b.timestamps[:0] + + cs := b.columns + for i := range cs { + cs[i].reset() + } + b.columns = cs[:0] + + ccs := b.constColumns + for i := range ccs { + ccs[i].Reset() + } + b.constColumns = ccs[:0] +} + +// uncompressedSizeBytes returns the total size of the origianl log entries stored in b. +// +// It is supposed that every log entry has the following format: +// +// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN +func (b *block) uncompressedSizeBytes() uint64 { + rowsCount := uint64(b.Len()) + + // Take into account timestamps + n := rowsCount * uint64(len(time.RFC3339Nano)) + + // Take into account columns + cs := b.columns + for i := range cs { + c := &cs[i] + nameLen := uint64(len(c.name)) + if nameLen == 0 { + nameLen = uint64(len("_msg")) + } + for _, v := range c.values { + if len(v) > 0 { + n += nameLen + 2 + uint64(len(v)) + } + } + } + + // Take into account constColumns + ccs := b.constColumns + for i := range ccs { + cc := &ccs[i] + nameLen := uint64(len(cc.Name)) + if nameLen == 0 { + nameLen = uint64(len("_msg")) + } + n += rowsCount * (2 + nameLen + uint64(len(cc.Value))) + } + + return n +} + +// uncompressedRowsSizeBytes returns the size of the uncompressed rows. +// +// It is supposed that every row has the following format: +// +// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN +func uncompressedRowsSizeBytes(rows [][]Field) uint64 { + n := uint64(0) + for _, fields := range rows { + n += uncompressedRowSizeBytes(fields) + } + return n +} + +// uncompressedRowSizeBytes returns the size of uncompressed row. +// +// It is supposed that the row has the following format: +// +// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN +func uncompressedRowSizeBytes(fields []Field) uint64 { + n := uint64(len(time.RFC3339Nano)) // log timestamp + for _, f := range fields { + nameLen := len(f.Name) + if nameLen == 0 { + nameLen = len("_msg") + } + n += uint64(2 + nameLen + len(f.Value)) + } + return n +} + +// column contains values for the given field name seen in log entries. +type column struct { + // name is the field name + name string + + // values is the values seen for the given log entries. + values []string +} + +func (c *column) reset() { + c.name = "" + + values := c.values + for i := range values { + values[i] = "" + } + c.values = values[:0] +} + +func (c *column) areSameValues() bool { + values := c.values + if len(values) < 2 { + return true + } + value := values[0] + for _, v := range values[1:] { + if value != v { + return false + } + } + return true +} + +func (c *column) resizeValues(valuesLen int) []string { + values := c.values + if n := valuesLen - cap(values); n > 0 { + values = append(values[:cap(values)], make([]string, n)...) + } + values = values[:valuesLen] + c.values = values + return values +} + +// mustWriteTo writes c to sw and updates ch accordingly. +func (c *column) mustWriteTo(ch *columnHeader, sw *streamWriters) { + ch.reset() + + valuesWriter := &sw.fieldValuesWriter + bloomFilterWriter := &sw.fieldBloomFilterWriter + if c.name == "" { + valuesWriter = &sw.messageValuesWriter + bloomFilterWriter = &sw.messageBloomFilterWriter + } + + ch.name = c.name + + // encode values + ve := getValuesEncoder() + ch.valueType, ch.minValue, ch.maxValue = ve.encode(c.values, &ch.valuesDict) + + bb := longTermBufPool.Get() + defer longTermBufPool.Put(bb) + + // marshal values + bb.B = marshalStringsBlock(bb.B[:0], ve.values) + putValuesEncoder(ve) + ch.valuesSize = uint64(len(bb.B)) + if ch.valuesSize > maxValuesBlockSize { + logger.Panicf("BUG: too valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize) + } + ch.valuesOffset = valuesWriter.bytesWritten + valuesWriter.MustWrite(bb.B) + + // create and marshal bloom filter for c.values + if ch.valueType != valueTypeDict { + tokensBuf := getTokensBuf() + tokensBuf.A = tokenizeStrings(tokensBuf.A[:0], c.values) + bb.B = bloomFilterMarshal(bb.B[:0], tokensBuf.A) + putTokensBuf(tokensBuf) + } else { + // there is no need in ecoding bloom filter for dictiory type, + // since it isn't used during querying - all the dictionary values are available in ch.valuesDict + bb.B = bb.B[:0] + } + ch.bloomFilterSize = uint64(len(bb.B)) + if ch.bloomFilterSize > maxBloomFilterBlockSize { + logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize) + } + ch.bloomFilterOffset = bloomFilterWriter.bytesWritten + bloomFilterWriter.MustWrite(bb.B) +} + +func (b *block) assertValid() { + // Check that timestamps are in ascending order + timestamps := b.timestamps + for i := 1; i < len(timestamps); i++ { + if timestamps[i-1] > timestamps[i] { + logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d", + timestamps[i-1], timestamps[i]) + } + } + + // Check that the number of items in each column matches the number of items in the block. + itemsCount := len(timestamps) + columns := b.columns + for _, c := range columns { + if len(c.values) != itemsCount { + logger.Panicf("BUG: unexpected number of values for column %q: got %d; want %d", c.name, len(c.values), itemsCount) + } + } +} + +// MustInitFromRows initializes b from the given timestamps and rows. +// +// It is expected that timestamps are sorted. +func (b *block) MustInitFromRows(timestamps []int64, rows [][]Field) { + b.reset() + + assertTimestampsSorted(timestamps) + b.timestamps = append(b.timestamps, timestamps...) + b.mustInitFromRows(rows) + b.sortColumnsByName() +} + +func (b *block) mustInitFromRows(rows [][]Field) { + rowsLen := len(rows) + if rowsLen == 0 { + // Nothing to do + return + } + + if areSameFieldsInRows(rows) { + // Fast path - all the log entries have the same fields + fields := rows[0] + for i := range fields { + f := &fields[i] + if areSameValuesForColumn(rows, i) { + cc := b.extendConstColumns() + cc.Name = f.Name + cc.Value = f.Value + } else { + c := b.extendColumns() + c.name = f.Name + values := c.resizeValues(rowsLen) + for j := range rows { + values[j] = rows[j][i].Value + } + } + } + return + } + + // Slow path - log entries contain different set of fields + + // Determine indexes for columns + columnIdxs := getColumnIdxs() + for i := range rows { + fields := rows[i] + for j := range fields { + name := fields[j].Name + if _, ok := columnIdxs[name]; !ok { + columnIdxs[name] = len(columnIdxs) + } + } + } + + // Initialize columns + cs := b.resizeColumns(len(columnIdxs)) + for name, idx := range columnIdxs { + c := &cs[idx] + c.name = name + c.resizeValues(rowsLen) + } + + // Write rows to block + for i := range rows { + for _, f := range rows[i] { + idx := columnIdxs[f.Name] + cs[idx].values[i] = f.Value + } + } + putColumnIdxs(columnIdxs) + + // Detect const columns + for i := len(cs) - 1; i >= 0; i-- { + c := &cs[i] + if !c.areSameValues() { + continue + } + cc := b.extendConstColumns() + cc.Name = c.name + cc.Value = c.values[0] + + c.reset() + if i < len(cs)-1 { + swapColumns(c, &cs[len(cs)-1]) + } + cs = cs[:len(cs)-1] + } + b.columns = cs +} + +func swapColumns(a, b *column) { + *a, *b = *b, *a +} + +func areSameValuesForColumn(rows [][]Field, colIdx int) bool { + if len(rows) < 2 { + return true + } + value := rows[0][colIdx].Value + rows = rows[1:] + for i := range rows { + if value != rows[i][colIdx].Value { + return false + } + } + return true +} + +func assertTimestampsSorted(timestamps []int64) { + for i := range timestamps { + if i > 0 && timestamps[i-1] > timestamps[i] { + logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d", + timestamps[i-1], timestamps[i]) + } + } +} + +func (b *block) extendConstColumns() *Field { + ccs := b.constColumns + if cap(ccs) > len(ccs) { + ccs = ccs[:len(ccs)+1] + } else { + ccs = append(ccs, Field{}) + } + b.constColumns = ccs + return &ccs[len(ccs)-1] +} + +func (b *block) extendColumns() *column { + cs := b.columns + if cap(cs) > len(cs) { + cs = cs[:len(cs)+1] + } else { + cs = append(cs, column{}) + } + b.columns = cs + return &cs[len(cs)-1] +} + +func (b *block) resizeColumns(columnsLen int) []column { + cs := b.columns[:0] + if n := columnsLen - cap(cs); n > 0 { + cs = append(cs[:cap(cs)], make([]column, n)...) + } + cs = cs[:columnsLen] + b.columns = cs + return cs +} + +func (b *block) sortColumnsByName() { + if len(b.columns)+len(b.constColumns) > maxColumnsPerBlock { + logger.Panicf("BUG: too big number of columns detected in the block: %d; the number of columns mustn't exceed %d", + len(b.columns)+len(b.constColumns), maxColumnsPerBlock) + } + + cs := getColumnsSorter() + cs.columns = b.columns + sort.Sort(cs) + putColumnsSorter(cs) + + ccs := getConstColumnsSorter() + ccs.columns = b.constColumns + sort.Sort(ccs) + putConstColumnsSorter(ccs) +} + +// Len returns the number of log entries in b. +func (b *block) Len() int { + return len(b.timestamps) +} + +// InitFromBlockData unmarshals bd to b. +// +// sbu and vd are used as a temporary storage for unmarshaled column values. +// +// The b becomes outdated after sbu or vd is reset. +func (b *block) InitFromBlockData(bd *blockData, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error { + b.reset() + + if bd.rowsCount > maxRowsPerBlock { + return fmt.Errorf("too many entries found in the block: %d; mustn't exceed %d", bd.rowsCount, maxRowsPerBlock) + } + rowsCount := int(bd.rowsCount) + + // unmarshal timestamps + td := &bd.timestampsData + var err error + b.timestamps, err = encoding.UnmarshalTimestamps(b.timestamps[:0], td.data, td.marshalType, td.minTimestamp, rowsCount) + if err != nil { + return fmt.Errorf("cannot unmarshal timestamps: %w", err) + } + + // unmarshal columns + cds := bd.columnsData + cs := b.resizeColumns(len(cds)) + for i := range cds { + cd := &cds[i] + c := &cs[i] + c.name = cd.name + c.values, err = sbu.unmarshal(c.values[:0], cd.valuesData, uint64(rowsCount)) + if err != nil { + return fmt.Errorf("cannot unmarshal column %d: %w", i, err) + } + if err = vd.decodeInplace(c.values, cd.valueType, &cd.valuesDict); err != nil { + return fmt.Errorf("cannot decode column values: %w", err) + } + } + + // unmarshal constColumns + b.constColumns = append(b.constColumns[:0], bd.constColumns...) + + return nil +} + +// mustWriteTo writes b with the given sid to sw and updates bh accordingly +func (b *block) mustWriteTo(sid *streamID, bh *blockHeader, sw *streamWriters) { + // Do not store the version used for encoding directly in the block data, since: + // - all the blocks in the same part use the same encoding + // - the block encoding version can be put in metadata file for the part (aka metadataFilename) + + b.assertValid() + bh.reset() + + bh.streamID = *sid + bh.uncompressedSizeBytes = b.uncompressedSizeBytes() + bh.rowsCount = uint64(b.Len()) + + // Marshal timestamps + mustWriteTimestampsTo(&bh.timestampsHeader, b.timestamps, sw) + + // Marshal columns + cs := b.columns + csh := getColumnsHeader() + chs := csh.resizeColumnHeaders(len(cs)) + for i := range cs { + cs[i].mustWriteTo(&chs[i], sw) + } + csh.constColumns = append(csh.constColumns[:0], b.constColumns...) + + bb := longTermBufPool.Get() + bb.B = csh.marshal(bb.B) + putColumnsHeader(csh) + bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten + bh.columnsHeaderSize = uint64(len(bb.B)) + if bh.columnsHeaderSize > maxColumnsHeaderSize { + logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize) + } + sw.columnsHeaderWriter.MustWrite(bb.B) + longTermBufPool.Put(bb) +} + +// appendRows appends log entries from b to dst. +func (b *block) appendRows(dst *rows) { + // copy timestamps + dst.timestamps = append(dst.timestamps, b.timestamps...) + + // copy columns + fieldsBuf := dst.fieldsBuf + ccs := b.constColumns + cs := b.columns + for i := range b.timestamps { + fieldsLen := len(fieldsBuf) + // copy const columns + for j := range ccs { + cc := &ccs[j] + fieldsBuf = append(fieldsBuf, Field{ + Name: cc.Name, + Value: cc.Value, + }) + } + // copy other columns + for j := range cs { + c := &cs[j] + value := c.values[i] + if len(value) == 0 { + continue + } + fieldsBuf = append(fieldsBuf, Field{ + Name: c.name, + Value: value, + }) + } + dst.rows = append(dst.rows, fieldsBuf[fieldsLen:]) + } + dst.fieldsBuf = fieldsBuf +} + +func areSameFieldsInRows(rows [][]Field) bool { + if len(rows) < 2 { + return true + } + fields := rows[0] + rows = rows[1:] + for i := range rows { + leFields := rows[i] + if len(fields) != len(leFields) { + return false + } + for j := range leFields { + if leFields[j].Name != fields[j].Name { + return false + } + } + } + return true +} + +var columnIdxsPool sync.Pool + +func getColumnIdxs() map[string]int { + v := columnIdxsPool.Get() + if v == nil { + return make(map[string]int) + } + return v.(map[string]int) +} + +func putColumnIdxs(m map[string]int) { + for k := range m { + delete(m, k) + } + columnIdxsPool.Put(m) +} + +func getBlock() *block { + v := blockPool.Get() + if v == nil { + return &block{} + } + return v.(*block) +} + +func putBlock(b *block) { + b.reset() + blockPool.Put(b) +} + +var blockPool sync.Pool + +type columnsSorter struct { + columns []column +} + +func (cs *columnsSorter) reset() { + cs.columns = nil +} + +func (cs *columnsSorter) Len() int { + return len(cs.columns) +} + +func (cs *columnsSorter) Less(i, j int) bool { + columns := cs.columns + return columns[i].name < columns[j].name +} + +func (cs *columnsSorter) Swap(i, j int) { + columns := cs.columns + columns[i], columns[j] = columns[j], columns[i] +} + +func getColumnsSorter() *columnsSorter { + v := columnsSorterPool.Get() + if v == nil { + return &columnsSorter{} + } + return v.(*columnsSorter) +} + +func putColumnsSorter(cs *columnsSorter) { + cs.reset() + columnsSorterPool.Put(cs) +} + +var columnsSorterPool sync.Pool + +type constColumnsSorter struct { + columns []Field +} + +func (ccs *constColumnsSorter) reset() { + ccs.columns = nil +} + +func (ccs *constColumnsSorter) Len() int { + return len(ccs.columns) +} + +func (ccs *constColumnsSorter) Less(i, j int) bool { + columns := ccs.columns + return columns[i].Name < columns[j].Name +} + +func (ccs *constColumnsSorter) Swap(i, j int) { + columns := ccs.columns + columns[i], columns[j] = columns[j], columns[i] +} + +func getConstColumnsSorter() *constColumnsSorter { + v := constColumnsSorterPool.Get() + if v == nil { + return &constColumnsSorter{} + } + return v.(*constColumnsSorter) +} + +func putConstColumnsSorter(ccs *constColumnsSorter) { + ccs.reset() + constColumnsSorterPool.Put(ccs) +} + +var constColumnsSorterPool sync.Pool + +// mustWriteTimestampsTo writes timestamps to sw and updates th accordingly +func mustWriteTimestampsTo(th *timestampsHeader, timestamps []int64, sw *streamWriters) { + th.reset() + + bb := longTermBufPool.Get() + bb.B, th.marshalType, th.minTimestamp = encoding.MarshalTimestamps(bb.B[:0], timestamps, 64) + if len(bb.B) > maxTimestampsBlockSize { + logger.Panicf("BUG: too big block with timestamps: %d bytes; the maximum supported size is %d bytes", len(bb.B), maxTimestampsBlockSize) + } + th.maxTimestamp = timestamps[len(timestamps)-1] + th.blockOffset = sw.timestampsWriter.bytesWritten + th.blockSize = uint64(len(bb.B)) + sw.timestampsWriter.MustWrite(bb.B) + longTermBufPool.Put(bb) +} diff --git a/lib/logstorage/block_data.go b/lib/logstorage/block_data.go new file mode 100644 index 000000000..6eeb14a30 --- /dev/null +++ b/lib/logstorage/block_data.go @@ -0,0 +1,383 @@ +package logstorage + +import ( + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// blockData contains packed data for a single block. +// +// The main purpose of this struct is to reduce the work needed during background merge of parts. +// If the block is full, then the blockData can be written to the destination part +// without the need to unpack it. +type blockData struct { + // streamID is id of the stream for the data + streamID streamID + + // uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block + uncompressedSizeBytes uint64 + + // rowsCount is the number of log entries in the block + rowsCount uint64 + + // timestampsData contains the encoded timestamps data for the block + timestampsData timestampsData + + // columnsData contains packed per-column data. + columnsData []columnData + + // constColumns contains data for const columns across the block. + constColumns []Field + + // a is used for storing byte slices for timestamps and columns. + // + // It reduces fragmentation for them. + a arena +} + +// reset resets bd for subsequent re-use +func (bd *blockData) reset() { + bd.streamID.reset() + bd.uncompressedSizeBytes = 0 + bd.rowsCount = 0 + bd.timestampsData.reset() + + cds := bd.columnsData + for i := range cds { + cds[i].reset() + } + bd.columnsData = cds[:0] + + ccs := bd.constColumns + for i := range ccs { + ccs[i].Reset() + } + bd.constColumns = ccs[:0] + + bd.a.reset() +} + +func (bd *blockData) resizeColumnsData(columnsDataLen int) []columnData { + cds := bd.columnsData + if n := columnsDataLen - cap(cds); n > 0 { + cds = append(cds[:cap(cds)], make([]columnData, n)...) + } + cds = cds[:columnsDataLen] + bd.columnsData = cds + return cds +} + +// copyFrom copies src to bd. +func (bd *blockData) copyFrom(src *blockData) { + bd.reset() + + bd.streamID = src.streamID + bd.uncompressedSizeBytes = src.uncompressedSizeBytes + bd.rowsCount = src.rowsCount + bd.timestampsData.copyFrom(&src.timestampsData, &bd.a) + + cdsSrc := src.columnsData + cds := bd.resizeColumnsData(len(cdsSrc)) + for i := range cds { + cds[i].copyFrom(&cdsSrc[i], &bd.a) + } + bd.columnsData = cds + + bd.constColumns = append(bd.constColumns[:0], src.constColumns...) +} + +// unmarshalRows appends unmarshaled from bd log entries to dst. +// +// The returned log entries are valid until sbu and vd are valid. +func (bd *blockData) unmarshalRows(dst *rows, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error { + b := getBlock() + defer putBlock(b) + + if err := b.InitFromBlockData(bd, sbu, vd); err != nil { + return err + } + b.appendRows(dst) + return nil +} + +// mustWriteTo writes bd with the given sid to sw and updates bh accordingly +func (bd *blockData) mustWriteTo(bh *blockHeader, sw *streamWriters) { + // Do not store the version used for encoding directly in the block data, since: + // - all the blocks in the same part use the same encoding + // - the block encoding version can be put in metadata file for the part (aka metadataFilename) + + bh.reset() + + bh.streamID = bd.streamID + bh.uncompressedSizeBytes = bd.uncompressedSizeBytes + bh.rowsCount = bd.rowsCount + + // Marshal timestamps + bd.timestampsData.mustWriteTo(&bh.timestampsHeader, sw) + + // Marshal columns + cds := bd.columnsData + csh := getColumnsHeader() + chs := csh.resizeColumnHeaders(len(cds)) + for i := range cds { + cds[i].mustWriteTo(&chs[i], sw) + } + csh.constColumns = append(csh.constColumns[:0], bd.constColumns...) + + bb := longTermBufPool.Get() + bb.B = csh.marshal(bb.B) + putColumnsHeader(csh) + bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten + bh.columnsHeaderSize = uint64(len(bb.B)) + if bh.columnsHeaderSize > maxColumnsHeaderSize { + logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize) + } + sw.columnsHeaderWriter.MustWrite(bb.B) + longTermBufPool.Put(bb) +} + +// mustReadFrom reads block data associated with bh from sr to bd. +func (bd *blockData) mustReadFrom(bh *blockHeader, sr *streamReaders) { + bd.reset() + + bd.streamID = bh.streamID + bd.uncompressedSizeBytes = bh.uncompressedSizeBytes + bd.rowsCount = bh.rowsCount + + // Read timestamps + bd.timestampsData.mustReadFrom(&bh.timestampsHeader, sr, &bd.a) + + // Read columns + if bh.columnsHeaderOffset != sr.columnsHeaderReader.bytesRead { + logger.Panicf("FATAL: %s: unexpected columnsHeaderOffset=%d; must equal to the number of bytes read: %d", + sr.columnsHeaderReader.Path(), bh.columnsHeaderOffset, sr.columnsHeaderReader.bytesRead) + } + columnsHeaderSize := bh.columnsHeaderSize + if columnsHeaderSize > maxColumnsHeaderSize { + logger.Panicf("BUG: %s: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", sr.columnsHeaderReader.Path(), columnsHeaderSize, maxColumnsHeaderSize) + } + bb := longTermBufPool.Get() + bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize)) + sr.columnsHeaderReader.MustReadFull(bb.B) + + csh := getColumnsHeader() + if err := csh.unmarshal(bb.B); err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal columnsHeader: %s", sr.columnsHeaderReader.Path(), err) + } + longTermBufPool.Put(bb) + chs := csh.columnHeaders + cds := bd.resizeColumnsData(len(chs)) + for i := range chs { + cds[i].mustReadFrom(&chs[i], sr, &bd.a) + } + bd.constColumns = append(bd.constColumns[:0], csh.constColumns...) + putColumnsHeader(csh) +} + +// timestampsData contains the encoded timestamps data. +type timestampsData struct { + // data contains packed timestamps data. + data []byte + + // marshalType is the marshal type for timestamps + marshalType encoding.MarshalType + + // minTimestamp is the minimum timestamp in the timestamps data + minTimestamp int64 + + // maxTimestamp is the maximum timestamp in the timestamps data + maxTimestamp int64 +} + +// reset resets td for subsequent re-use +func (td *timestampsData) reset() { + td.data = nil + td.marshalType = 0 + td.minTimestamp = 0 + td.maxTimestamp = 0 +} + +// copyFrom copies src to td. +func (td *timestampsData) copyFrom(src *timestampsData, a *arena) { + td.reset() + + td.data = a.copyBytes(src.data) + td.marshalType = src.marshalType + td.minTimestamp = src.minTimestamp + td.maxTimestamp = src.maxTimestamp +} + +// mustWriteTo writes td to sw and updates th accordingly +func (td *timestampsData) mustWriteTo(th *timestampsHeader, sw *streamWriters) { + th.reset() + + th.marshalType = td.marshalType + th.minTimestamp = td.minTimestamp + th.maxTimestamp = td.maxTimestamp + th.blockOffset = sw.timestampsWriter.bytesWritten + th.blockSize = uint64(len(td.data)) + if th.blockSize > maxTimestampsBlockSize { + logger.Panicf("BUG: too big timestampsHeader.blockSize: %d bytes; mustn't exceed %d bytes", th.blockSize, maxTimestampsBlockSize) + } + sw.timestampsWriter.MustWrite(td.data) +} + +// mustReadFrom reads timestamps data associated with th from sr to td. +func (td *timestampsData) mustReadFrom(th *timestampsHeader, sr *streamReaders, a *arena) { + td.reset() + + td.marshalType = th.marshalType + td.minTimestamp = th.minTimestamp + td.maxTimestamp = th.maxTimestamp + + timestampsReader := &sr.timestampsReader + if th.blockOffset != timestampsReader.bytesRead { + logger.Panicf("FATAL: %s: unexpected timestampsHeader.blockOffset=%d; must equal to the number of bytes read: %d", + timestampsReader.Path(), th.blockOffset, timestampsReader.bytesRead) + } + timestampsBlockSize := th.blockSize + if timestampsBlockSize > maxTimestampsBlockSize { + logger.Panicf("FATAL: %s: too big timestamps block with %d bytes; the maximum supported block size is %d bytes", + timestampsReader.Path(), timestampsBlockSize, maxTimestampsBlockSize) + } + td.data = a.newBytes(int(timestampsBlockSize)) + timestampsReader.MustReadFull(td.data) +} + +// columnData contains packed data for a single column. +type columnData struct { + // name is the column name + name string + + // valueType is the type of values stored in valuesData + valueType valueType + + // minValue is the minimum encoded uint* or float64 value in the columnHeader + // + // It is used for fast detection of whether the given columnHeader contains values in the given range + minValue uint64 + + // maxValue is the maximum encoded uint* or float64 value in the columnHeader + // + // It is used for fast detection of whether the given columnHeader contains values in the given range + maxValue uint64 + + // valuesDict contains unique values for valueType = valueTypeDict + valuesDict valuesDict + + // valuesData contains packed values data for the given column + valuesData []byte + + // bloomFilterData contains packed bloomFilter data for the given column + bloomFilterData []byte +} + +// reset rests cd for subsequent re-use +func (cd *columnData) reset() { + cd.name = "" + cd.valueType = 0 + + cd.minValue = 0 + cd.maxValue = 0 + cd.valuesDict.reset() + + cd.valuesData = nil + cd.bloomFilterData = nil +} + +// copyFrom copies src to cd. +func (cd *columnData) copyFrom(src *columnData, a *arena) { + cd.reset() + + cd.name = src.name + cd.valueType = src.valueType + + cd.minValue = src.minValue + cd.maxValue = src.maxValue + cd.valuesDict.copyFrom(&src.valuesDict) + + cd.valuesData = a.copyBytes(src.valuesData) + cd.bloomFilterData = a.copyBytes(src.bloomFilterData) +} + +// mustWriteTo writes cd to sw and updates ch accordingly. +func (cd *columnData) mustWriteTo(ch *columnHeader, sw *streamWriters) { + ch.reset() + + valuesWriter := &sw.fieldValuesWriter + bloomFilterWriter := &sw.fieldBloomFilterWriter + if cd.name == "" { + valuesWriter = &sw.messageValuesWriter + bloomFilterWriter = &sw.messageBloomFilterWriter + } + + ch.name = cd.name + ch.valueType = cd.valueType + + ch.minValue = cd.minValue + ch.maxValue = cd.maxValue + ch.valuesDict.copyFrom(&cd.valuesDict) + + // marshal values + ch.valuesSize = uint64(len(cd.valuesData)) + if ch.valuesSize > maxValuesBlockSize { + logger.Panicf("BUG: too big valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize) + } + ch.valuesOffset = valuesWriter.bytesWritten + valuesWriter.MustWrite(cd.valuesData) + + // marshal bloom filter + ch.bloomFilterSize = uint64(len(cd.bloomFilterData)) + if ch.bloomFilterSize > maxBloomFilterBlockSize { + logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize) + } + ch.bloomFilterOffset = bloomFilterWriter.bytesWritten + bloomFilterWriter.MustWrite(cd.bloomFilterData) +} + +// mustReadFrom reads columns data associated with ch from sr to cd. +func (cd *columnData) mustReadFrom(ch *columnHeader, sr *streamReaders, a *arena) { + cd.reset() + + valuesReader := &sr.fieldValuesReader + bloomFilterReader := &sr.fieldBloomFilterReader + if ch.name == "" { + valuesReader = &sr.messageValuesReader + bloomFilterReader = &sr.messageBloomFilterReader + } + + cd.name = ch.name + cd.valueType = ch.valueType + + cd.minValue = ch.minValue + cd.maxValue = ch.maxValue + cd.valuesDict.copyFrom(&ch.valuesDict) + + // read values + if ch.valuesOffset != valuesReader.bytesRead { + logger.Panicf("FATAL: %s: unexpected columnHeader.valuesOffset=%d; must equal to the number of bytes read: %d", + valuesReader.Path(), ch.valuesOffset, valuesReader.bytesRead) + } + valuesSize := ch.valuesSize + if valuesSize > maxValuesBlockSize { + logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", valuesReader.Path(), maxValuesBlockSize, valuesSize) + } + cd.valuesData = a.newBytes(int(valuesSize)) + valuesReader.MustReadFull(cd.valuesData) + + // read bloom filter + // bloom filter is missing in valueTypeDict. + if ch.valueType != valueTypeDict { + if ch.bloomFilterOffset != bloomFilterReader.bytesRead { + logger.Panicf("FATAL: %s: unexpected columnHeader.bloomFilterOffset=%d; must equal to the number of bytes read: %d", + bloomFilterReader.Path(), ch.bloomFilterOffset, bloomFilterReader.bytesRead) + } + bloomFilterSize := ch.bloomFilterSize + if bloomFilterSize > maxBloomFilterBlockSize { + logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bloomFilterReader.Path(), maxBloomFilterBlockSize, bloomFilterSize) + } + cd.bloomFilterData = a.newBytes(int(bloomFilterSize)) + bloomFilterReader.MustReadFull(cd.bloomFilterData) + } +} diff --git a/lib/logstorage/block_data_test.go b/lib/logstorage/block_data_test.go new file mode 100644 index 000000000..975d8c486 --- /dev/null +++ b/lib/logstorage/block_data_test.go @@ -0,0 +1,106 @@ +package logstorage + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" +) + +func TestBlockDataReset(t *testing.T) { + bd := &blockData{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 432, + }, + }, + uncompressedSizeBytes: 2344, + rowsCount: 134, + timestampsData: timestampsData{ + data: []byte("foo"), + marshalType: encoding.MarshalTypeDeltaConst, + minTimestamp: 1234, + maxTimestamp: 23443, + }, + columnsData: []columnData{ + { + name: "foo", + valueType: valueTypeUint16, + valuesData: []byte("aaa"), + bloomFilterData: []byte("bsdf"), + }, + }, + constColumns: []Field{ + { + Name: "foo", + Value: "bar", + }, + }, + } + bd.reset() + bdZero := &blockData{ + columnsData: []columnData{}, + constColumns: []Field{}, + } + if !reflect.DeepEqual(bd, bdZero) { + t.Fatalf("unexpected non-zero blockData after reset: %v", bd) + } +} + +func TestBlockDataCopyFrom(t *testing.T) { + f := func(bd *blockData) { + t.Helper() + var bd2 blockData + bd2.copyFrom(bd) + bd2.a.b = nil + if !reflect.DeepEqual(bd, &bd2) { + t.Fatalf("unexpected blockData copy\ngot\n%v\nwant\n%v", &bd2, bd) + } + + // Try copying it again to the same destination + bd2.copyFrom(bd) + bd2.a.b = nil + if !reflect.DeepEqual(bd, &bd2) { + t.Fatalf("unexpected blockData copy to the same destination\ngot\n%v\nwant\n%v", &bd2, bd) + } + } + f(&blockData{}) + + bd := &blockData{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 432, + }, + }, + uncompressedSizeBytes: 8943, + rowsCount: 134, + timestampsData: timestampsData{ + data: []byte("foo"), + marshalType: encoding.MarshalTypeDeltaConst, + minTimestamp: 1234, + maxTimestamp: 23443, + }, + columnsData: []columnData{ + { + name: "foo", + valueType: valueTypeUint16, + valuesData: []byte("aaa"), + bloomFilterData: []byte("bsdf"), + }, + { + name: "bar", + valuesData: []byte("aaa"), + bloomFilterData: []byte("bsdf"), + }, + }, + constColumns: []Field{ + { + Name: "foobar", + Value: "baz", + }, + }, + } + f(bd) +} diff --git a/lib/logstorage/block_header.go b/lib/logstorage/block_header.go new file mode 100644 index 000000000..490c79216 --- /dev/null +++ b/lib/logstorage/block_header.go @@ -0,0 +1,766 @@ +package logstorage + +import ( + "fmt" + "math" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// blockHeader contains information about a single block. +// +// blockHeader is stored in the indexFilename file. +type blockHeader struct { + // streamID is a stream id for entries in the block + streamID streamID + + // uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block + uncompressedSizeBytes uint64 + + // rowsCount is the number of log entries stored in the block + rowsCount uint64 + + // timestampsHeader contains information about timestamps for log entries in the block + timestampsHeader timestampsHeader + + // columnsHeaderOffset is the offset of columnsHeader at columnsHeaderFilename + columnsHeaderOffset uint64 + + // columnsHeaderSize is the size of columnsHeader at columnsHeaderFilename + columnsHeaderSize uint64 +} + +// reset resets bh, so it can be re-used. +func (bh *blockHeader) reset() { + bh.streamID.reset() + bh.uncompressedSizeBytes = 0 + bh.rowsCount = 0 + bh.timestampsHeader.reset() + bh.columnsHeaderOffset = 0 + bh.columnsHeaderSize = 0 +} + +func (bh *blockHeader) copyFrom(src *blockHeader) { + bh.reset() + + bh.streamID = src.streamID + bh.uncompressedSizeBytes = src.uncompressedSizeBytes + bh.rowsCount = src.rowsCount + bh.timestampsHeader.copyFrom(&src.timestampsHeader) + bh.columnsHeaderOffset = src.columnsHeaderOffset + bh.columnsHeaderSize = src.columnsHeaderSize +} + +// marshal appends the marshaled bh to dst and returns the result. +func (bh *blockHeader) marshal(dst []byte) []byte { + // Do not store the version used for encoding directly in the block header, since: + // - all the block headers in the same part use the same encoding + // - the block header encoding version can be put in metadata file for the part (aka metadataFilename) + + dst = bh.streamID.marshal(dst) + dst = encoding.MarshalVarUint64(dst, bh.uncompressedSizeBytes) + dst = encoding.MarshalVarUint64(dst, bh.rowsCount) + dst = bh.timestampsHeader.marshal(dst) + dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderOffset) + dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderSize) + + return dst +} + +// unmarshal unmarshals bh from src and returns the remaining tail. +func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) { + bh.reset() + + srcOrig := src + + // unmarshal bh.streamID + tail, err := bh.streamID.unmarshal(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err) + } + src = tail + + // unmarshal bh.uncompressedSizeBytes + tail, n, err := encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal uncompressedSizeBytes: %w", err) + } + bh.uncompressedSizeBytes = n + src = tail + + // unmarshal bh.rowsCount + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal rowsCount: %w", err) + } + if n > maxRowsPerBlock { + return srcOrig, fmt.Errorf("too big value for rowsCount: %d; mustn't exceed %d", n, maxRowsPerBlock) + } + bh.rowsCount = n + src = tail + + // unmarshal bh.timestampsHeader + tail, err = bh.timestampsHeader.unmarshal(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal timestampsHeader: %w", err) + } + src = tail + + // unmarshal columnsHeaderOffset + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderOffset: %w", err) + } + bh.columnsHeaderOffset = n + src = tail + + // unmarshal columnsHeaderSize + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderSize: %w", err) + } + if n > maxColumnsHeaderSize { + return srcOrig, fmt.Errorf("too big value for columnsHeaderSize: %d; mustn't exceed %d", n, maxColumnsHeaderSize) + } + bh.columnsHeaderSize = n + src = tail + + return src, nil +} + +func getBlockHeader() *blockHeader { + v := blockHeaderPool.Get() + if v == nil { + return &blockHeader{} + } + return v.(*blockHeader) +} + +func putBlockHeader(bh *blockHeader) { + bh.reset() + blockHeaderPool.Put(bh) +} + +var blockHeaderPool sync.Pool + +// unmarshalBlockHeaders appends unmarshaled from src blockHeader entries to dst and returns the result. +func unmarshalBlockHeaders(dst []blockHeader, src []byte) ([]blockHeader, error) { + dstOrig := dst + for len(src) > 0 { + if len(dst) < cap(dst) { + dst = dst[:len(dst)+1] + } else { + dst = append(dst, blockHeader{}) + } + bh := &dst[len(dst)-1] + tail, err := bh.unmarshal(src) + if err != nil { + return dstOrig, fmt.Errorf("cannot unmarshal blockHeader entries: %w", err) + } + src = tail + } + if err := validateBlockHeaders(dst[len(dstOrig):]); err != nil { + return dstOrig, err + } + return dst, nil +} + +func validateBlockHeaders(bhs []blockHeader) error { + for i := 1; i < len(bhs); i++ { + bhCurr := &bhs[i] + bhPrev := &bhs[i-1] + if bhCurr.streamID.less(&bhPrev.streamID) { + return fmt.Errorf("unexpected blockHeader with smaller streamID=%s after bigger streamID=%s at position %d", &bhCurr.streamID, &bhPrev.streamID, i) + } + if !bhCurr.streamID.equal(&bhPrev.streamID) { + continue + } + thCurr := bhCurr.timestampsHeader + thPrev := bhPrev.timestampsHeader + if thCurr.minTimestamp < thPrev.minTimestamp { + return fmt.Errorf("unexpected blockHeader with smaller timestamp=%d after bigger timestamp=%d at position %d", thCurr.minTimestamp, thPrev.minTimestamp, i) + } + } + return nil +} + +func resetBlockHeaders(bhs []blockHeader) []blockHeader { + for i := range bhs { + bhs[i].reset() + } + return bhs[:0] +} + +func getColumnsHeader() *columnsHeader { + v := columnsHeaderPool.Get() + if v == nil { + return &columnsHeader{} + } + return v.(*columnsHeader) +} + +func putColumnsHeader(csh *columnsHeader) { + csh.reset() + columnsHeaderPool.Put(csh) +} + +var columnsHeaderPool sync.Pool + +// columnsHeader contains information about columns in a single block. +// +// columnsHeader is stored in the columnsHeaderFilename file. +type columnsHeader struct { + // columnHeaders contains the information about every column seen in the block. + columnHeaders []columnHeader + + // constColumns contain fields with constant values across all the block entries. + constColumns []Field +} + +func (csh *columnsHeader) reset() { + chs := csh.columnHeaders + for i := range chs { + chs[i].reset() + } + csh.columnHeaders = chs[:0] + + ccs := csh.constColumns + for i := range ccs { + ccs[i].Reset() + } + csh.constColumns = ccs[:0] +} + +func (csh *columnsHeader) getConstColumnValue(name string) string { + if name == "_msg" { + name = "" + } + ccs := csh.constColumns + for i := range ccs { + cc := &ccs[i] + if cc.Name == name { + return cc.Value + } + } + return "" +} + +func (csh *columnsHeader) getColumnHeader(name string) *columnHeader { + if name == "_msg" { + name = "" + } + chs := csh.columnHeaders + for i := range chs { + ch := &chs[i] + if ch.name == name { + return ch + } + } + return nil +} + +func (csh *columnsHeader) resizeConstColumns(columnsLen int) []Field { + ccs := csh.constColumns + if n := columnsLen - cap(ccs); n > 0 { + ccs = append(ccs[:cap(ccs)], make([]Field, n)...) + } + ccs = ccs[:columnsLen] + csh.constColumns = ccs + return ccs +} + +func (csh *columnsHeader) resizeColumnHeaders(columnHeadersLen int) []columnHeader { + chs := csh.columnHeaders + if n := columnHeadersLen - cap(chs); n > 0 { + chs = append(chs[:cap(chs)], make([]columnHeader, n)...) + } + chs = chs[:columnHeadersLen] + csh.columnHeaders = chs + return chs +} + +func (csh *columnsHeader) marshal(dst []byte) []byte { + chs := csh.columnHeaders + dst = encoding.MarshalVarUint64(dst, uint64(len(chs))) + for i := range chs { + dst = chs[i].marshal(dst) + } + + ccs := csh.constColumns + dst = encoding.MarshalVarUint64(dst, uint64(len(ccs))) + for i := range ccs { + dst = ccs[i].marshal(dst) + } + + return dst +} + +func (csh *columnsHeader) unmarshal(src []byte) error { + csh.reset() + + // unmarshal columnHeaders + tail, n, err := encoding.UnmarshalVarUint64(src) + if err != nil { + return fmt.Errorf("cannot unmarshal columnHeaders len: %w", err) + } + if n > maxColumnsPerBlock { + return fmt.Errorf("too many column headers: %d; mustn't exceed %d", n, maxColumnsPerBlock) + } + src = tail + chs := csh.resizeColumnHeaders(int(n)) + for i := range chs { + tail, err = chs[i].unmarshal(src) + if err != nil { + return fmt.Errorf("cannot unmarshal columnHeader %d out of %d columnHeaders: %w", i, len(chs), err) + } + src = tail + } + csh.columnHeaders = chs + + // unmarshal constColumns + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return fmt.Errorf("cannot unmarshal constColumns len: %w", err) + } + if n+uint64(len(csh.columnHeaders)) > maxColumnsPerBlock { + return fmt.Errorf("too many columns: %d; mustn't exceed %d", n+uint64(len(csh.columnHeaders)), maxColumnsPerBlock) + } + src = tail + ccs := csh.resizeConstColumns(int(n)) + for i := range ccs { + tail, err = ccs[i].unmarshal(src) + if err != nil { + return fmt.Errorf("cannot unmarshal constColumn %d out of %d columns: %w", i, len(ccs), err) + } + src = tail + } + + // Verify that the src is empty + if len(src) > 0 { + return fmt.Errorf("unexpected non-empty tail left after unmarshaling columnsHeader: len(tail)=%d", len(src)) + } + + return nil +} + +// columnHeaders contains information for values, which belong to a single label in a single block. +// +// The main column with an empty name is stored in messageValuesFilename, +// while the rest of columns are stored in fieldValuesFilename. +// This allows minimizing disk read IO when filtering by non-message columns. +// +// Every block column contains also a bloom filter for all the tokens stored in the column. +// This bloom filter is used for fast determining whether the given block may contain the given tokens. +// +// Tokens in bloom filter depend on valueType: +// +// - valueTypeString stores lowercased tokens seen in all the values +// - valueTypeDict doesn't store anything in the bloom filter, since all the encoded values +// are available directly in the valuesDict field +// - valueTypeUint8, valueTypeUint16, valueTypeUint32 and valueTypeUint64 stores encoded uint values +// - valueTypeFloat64 stores encoded float64 values +// - valueTypeIPv4 stores encoded into uint32 ips +// - valueTypeTimestampISO8601 stores encoded into uint64 timestamps +// +// Bloom filters for main column with an empty name is stored in messageBloomFilename, +// while the rest of columns are stored in fieldBloomFilename. +type columnHeader struct { + // name contains column name aka label name + name string + + // valueType is the type of values stored in the block + valueType valueType + + // minValue is the minimum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader + // + // It is used for fast detection of whether the given columnHeader contains values in the given range + minValue uint64 + + // maxValue is the maximum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader + // + // It is used for fast detection of whether the given columnHeader contains values in the given range + maxValue uint64 + + // valuesDict contains unique values for valueType = valueTypeDict + valuesDict valuesDict + + // valuesOffset contains the offset of the block in either messageValuesFilename or fieldValuesFilename + valuesOffset uint64 + + // valuesSize contains the size of the block in either messageValuesFilename or fieldValuesFilename + valuesSize uint64 + + // bloomFilterOffset contains the offset of the bloom filter in either messageBloomFilename or fieldBloomFilename + bloomFilterOffset uint64 + + // bloomFilterSize contains the size of the bloom filter in either messageBloomFilename or fieldBloomFilename + bloomFilterSize uint64 +} + +// reset resets ch +func (ch *columnHeader) reset() { + ch.name = "" + ch.valueType = 0 + + ch.minValue = 0 + ch.maxValue = 0 + ch.valuesDict.reset() + + ch.valuesOffset = 0 + ch.valuesSize = 0 + + ch.bloomFilterOffset = 0 + ch.bloomFilterSize = 0 +} + +// marshal appends marshaled ch to dst and returns the result. +func (ch *columnHeader) marshal(dst []byte) []byte { + // check minValue/maxValue + if ch.valueType == valueTypeFloat64 { + minValue := math.Float64frombits(ch.minValue) + maxValue := math.Float64frombits(ch.maxValue) + if minValue > maxValue { + logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g", minValue, maxValue) + } + } else { + if ch.minValue > ch.maxValue { + logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d", ch.minValue, ch.maxValue) + } + } + + // Encode common fields - ch.name and ch.valueType + dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(ch.name)) + dst = append(dst, byte(ch.valueType)) + + // Encode other fields depending on ch.valueType + switch ch.valueType { + case valueTypeString: + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeDict: + dst = ch.valuesDict.marshal(dst) + dst = ch.marshalValues(dst) + case valueTypeUint8: + dst = append(dst, byte(ch.minValue)) + dst = append(dst, byte(ch.maxValue)) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeUint16: + dst = encoding.MarshalUint16(dst, uint16(ch.minValue)) + dst = encoding.MarshalUint16(dst, uint16(ch.maxValue)) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeUint32: + dst = encoding.MarshalUint32(dst, uint32(ch.minValue)) + dst = encoding.MarshalUint32(dst, uint32(ch.maxValue)) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeUint64: + dst = encoding.MarshalUint64(dst, ch.minValue) + dst = encoding.MarshalUint64(dst, ch.maxValue) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeFloat64: + // float64 values are encoded as uint64 via math.Float64bits() + dst = encoding.MarshalUint64(dst, ch.minValue) + dst = encoding.MarshalUint64(dst, ch.maxValue) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeIPv4: + dst = encoding.MarshalUint32(dst, uint32(ch.minValue)) + dst = encoding.MarshalUint32(dst, uint32(ch.maxValue)) + dst = ch.marshalValuesAndBloomFilters(dst) + case valueTypeTimestampISO8601: + // timestamps are encoded in nanoseconds + dst = encoding.MarshalUint64(dst, ch.minValue) + dst = encoding.MarshalUint64(dst, ch.maxValue) + dst = ch.marshalValuesAndBloomFilters(dst) + default: + logger.Panicf("BUG: unknown valueType=%d", ch.valueType) + } + + return dst +} + +func (ch *columnHeader) marshalValuesAndBloomFilters(dst []byte) []byte { + dst = ch.marshalValues(dst) + dst = ch.marshalBloomFilters(dst) + return dst +} + +func (ch *columnHeader) marshalValues(dst []byte) []byte { + dst = encoding.MarshalVarUint64(dst, ch.valuesOffset) + dst = encoding.MarshalVarUint64(dst, ch.valuesSize) + return dst +} + +func (ch *columnHeader) marshalBloomFilters(dst []byte) []byte { + dst = encoding.MarshalVarUint64(dst, ch.bloomFilterOffset) + dst = encoding.MarshalVarUint64(dst, ch.bloomFilterSize) + return dst +} + +// unmarshal unmarshals ch from src and returns the tail left after unmarshaling. +func (ch *columnHeader) unmarshal(src []byte) ([]byte, error) { + ch.reset() + + srcOrig := src + + // Unmarshal column name + tail, data, err := encoding.UnmarshalBytes(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal column name: %w", err) + } + // Do not use bytesutil.InternBytes(data) here, since it works slower than the string(data) in prod + ch.name = string(data) + src = tail + + // Unmarshal value type + if len(src) < 1 { + return srcOrig, fmt.Errorf("cannot unmarshal valueType from 0 bytes for column %q; need at least 1 byte", ch.name) + } + ch.valueType = valueType(src[0]) + src = src[1:] + + // Unmarshal the rest of data depending on valueType + switch ch.valueType { + case valueTypeString: + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeString for column %q: %w", ch.name, err) + } + src = tail + case valueTypeDict: + tail, err = ch.valuesDict.unmarshal(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal dict at valueTypeDict for column %q: %w", ch.name, err) + } + src = tail + + tail, err = ch.unmarshalValues(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values at valueTypeDict for column %q: %w", ch.name, err) + } + src = tail + case valueTypeUint8: + if len(src) < 2 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint8 from %d bytes for column %q; need at least 2 bytes", len(src), ch.name) + } + ch.minValue = uint64(src[0]) + ch.maxValue = uint64(src[1]) + src = src[2:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint8 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeUint16: + if len(src) < 4 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint16 from %d bytes for column %q; need at least 4 bytes", len(src), ch.name) + } + ch.minValue = uint64(encoding.UnmarshalUint16(src)) + ch.maxValue = uint64(encoding.UnmarshalUint16(src[2:])) + src = src[4:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint16 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeUint32: + if len(src) < 8 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint32 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name) + } + ch.minValue = uint64(encoding.UnmarshalUint32(src)) + ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:])) + src = src[8:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint32 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeUint64: + if len(src) < 16 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name) + } + ch.minValue = encoding.UnmarshalUint64(src) + ch.maxValue = encoding.UnmarshalUint64(src[8:]) + src = src[16:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint64 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeFloat64: + if len(src) < 16 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeFloat64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name) + } + // min and max values must be converted to real values with math.Float64frombits() during querying. + ch.minValue = encoding.UnmarshalUint64(src) + ch.maxValue = encoding.UnmarshalUint64(src[8:]) + src = src[16:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeFloat64 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeIPv4: + if len(src) < 8 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeIPv4 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name) + } + ch.minValue = uint64(encoding.UnmarshalUint32(src)) + ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:])) + src = src[8:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeIPv4 for column %q: %w", ch.name, err) + } + src = tail + case valueTypeTimestampISO8601: + if len(src) < 16 { + return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeTimestampISO8601 from %d bytes for column %q; need at least 16 bytes", + len(src), ch.name) + } + ch.minValue = encoding.UnmarshalUint64(src) + ch.maxValue = encoding.UnmarshalUint64(src[8:]) + src = src[16:] + + tail, err = ch.unmarshalValuesAndBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeTimestampISO8601 for column %q: %w", ch.name, err) + } + src = tail + default: + return srcOrig, fmt.Errorf("unexpected valueType=%d for column %q", ch.valueType, ch.name) + } + + return src, nil +} + +func (ch *columnHeader) unmarshalValuesAndBloomFilters(src []byte) ([]byte, error) { + srcOrig := src + + tail, err := ch.unmarshalValues(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal values: %w", err) + } + src = tail + + tail, err = ch.unmarshalBloomFilters(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal bloom filters: %w", err) + } + src = tail + + return src, nil +} + +func (ch *columnHeader) unmarshalValues(src []byte) ([]byte, error) { + srcOrig := src + + tail, n, err := encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal valuesOffset: %w", err) + } + ch.valuesOffset = n + src = tail + + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal valuesSize: %w", err) + } + if n > maxValuesBlockSize { + return srcOrig, fmt.Errorf("too big valuesSize: %d bytes; mustn't exceed %d bytes", n, maxValuesBlockSize) + } + ch.valuesSize = n + src = tail + + return src, nil +} + +func (ch *columnHeader) unmarshalBloomFilters(src []byte) ([]byte, error) { + srcOrig := src + + tail, n, err := encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterOffset: %w", err) + } + ch.bloomFilterOffset = n + src = tail + + tail, n, err = encoding.UnmarshalVarUint64(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterSize: %w", err) + } + if n > maxBloomFilterBlockSize { + return srcOrig, fmt.Errorf("too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", n, maxBloomFilterBlockSize) + } + ch.bloomFilterSize = n + src = tail + + return src, nil +} + +// timestampsHeader contains the information about timestamps block. +type timestampsHeader struct { + // blockOffset is an offset of timestamps block inside timestampsFilename file + blockOffset uint64 + + // blockSize is the size of the timestamps block inside timestampsFilename file + blockSize uint64 + + // minTimestamp is the mimumum timestamp seen in the block + minTimestamp int64 + + // maxTimestamp is the maximum timestamp seen in the block + maxTimestamp int64 + + // marshalType is the type used for encoding the timestamps block + marshalType encoding.MarshalType +} + +// reset resets th, so it can be reused +func (th *timestampsHeader) reset() { + th.blockOffset = 0 + th.blockSize = 0 + th.minTimestamp = 0 + th.maxTimestamp = 0 + th.marshalType = 0 +} + +func (th *timestampsHeader) copyFrom(src *timestampsHeader) { + th.blockOffset = src.blockOffset + th.blockSize = src.blockSize + th.minTimestamp = src.minTimestamp + th.maxTimestamp = src.maxTimestamp + th.marshalType = src.marshalType +} + +// marshal appends marshaled th to dst and returns the result. +func (th *timestampsHeader) marshal(dst []byte) []byte { + dst = encoding.MarshalUint64(dst, th.blockOffset) + dst = encoding.MarshalUint64(dst, th.blockSize) + dst = encoding.MarshalUint64(dst, uint64(th.minTimestamp)) + dst = encoding.MarshalUint64(dst, uint64(th.maxTimestamp)) + dst = append(dst, byte(th.marshalType)) + return dst +} + +// unmarshal unmarshals th from src and returns the tail left after the unmarshaling. +func (th *timestampsHeader) unmarshal(src []byte) ([]byte, error) { + th.reset() + + if len(src) < 33 { + return src, fmt.Errorf("cannot unmarshal timestampsHeader from %d bytes; need at least 33 bytes", len(src)) + } + + th.blockOffset = encoding.UnmarshalUint64(src) + th.blockSize = encoding.UnmarshalUint64(src[8:]) + th.minTimestamp = int64(encoding.UnmarshalUint64(src[16:])) + th.maxTimestamp = int64(encoding.UnmarshalUint64(src[24:])) + th.marshalType = encoding.MarshalType(src[32]) + + return src[33:], nil +} diff --git a/lib/logstorage/block_header_test.go b/lib/logstorage/block_header_test.go new file mode 100644 index 000000000..d6df322f6 --- /dev/null +++ b/lib/logstorage/block_header_test.go @@ -0,0 +1,454 @@ +package logstorage + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" +) + +func TestBlockHeaderMarshalUnmarshal(t *testing.T) { + f := func(bh *blockHeader, marshaledLen int) { + t.Helper() + data := bh.marshal(nil) + if len(data) != marshaledLen { + t.Fatalf("unexpected lengths of the marshaled blockHeader; got %d; want %d", len(data), marshaledLen) + } + bh2 := &blockHeader{} + tail, err := bh2.unmarshal(data) + if err != nil { + t.Fatalf("unexpected error in unmarshal: %s", err) + } + if len(tail) > 0 { + t.Fatalf("unexpected non-empty tail after unmarshal: %X", tail) + } + if !reflect.DeepEqual(bh, bh2) { + t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", bh2, bh) + } + } + f(&blockHeader{}, 61) + f(&blockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + lo: 3443, + hi: 23434, + }, + }, + uncompressedSizeBytes: 4344, + rowsCount: 1234, + timestampsHeader: timestampsHeader{ + blockOffset: 13234, + blockSize: 8843, + minTimestamp: -4334, + maxTimestamp: 23434, + marshalType: encoding.MarshalTypeNearestDelta2, + }, + columnsHeaderOffset: 4384, + columnsHeaderSize: 894, + }, 65) +} + +func TestColumnsHeaderMarshalUnmarshal(t *testing.T) { + f := func(csh *columnsHeader, marshaledLen int) { + t.Helper() + data := csh.marshal(nil) + if len(data) != marshaledLen { + t.Fatalf("unexpected lengths of the marshaled columnsHeader; got %d; want %d", len(data), marshaledLen) + } + csh2 := &columnsHeader{} + err := csh2.unmarshal(data) + if err != nil { + t.Fatalf("unexpected error in unmarshal: %s", err) + } + if !reflect.DeepEqual(csh, csh2) { + t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", csh2, csh) + } + } + f(&columnsHeader{}, 2) + f(&columnsHeader{ + columnHeaders: []columnHeader{ + { + name: "foobar", + valueType: valueTypeString, + valuesOffset: 12345, + valuesSize: 23434, + bloomFilterOffset: 89843, + bloomFilterSize: 8934, + }, + { + name: "message", + valueType: valueTypeUint16, + minValue: 123, + maxValue: 456, + valuesOffset: 3412345, + valuesSize: 234434, + bloomFilterOffset: 83, + bloomFilterSize: 34, + }, + }, + constColumns: []Field{ + { + Name: "foo", + Value: "bar", + }, + }, + }, 50) +} + +func TestBlockHeaderUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + dataOrig := append([]byte{}, data...) + bh := getBlockHeader() + defer putBlockHeader(bh) + tail, err := bh.unmarshal(data) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if string(tail) != string(dataOrig) { + t.Fatalf("unexpected tail;\ngot\n%q\nwant\n%q", tail, dataOrig) + } + } + f(nil) + f([]byte("foo")) + + bh := blockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + lo: 3443, + hi: 23434, + }, + }, + uncompressedSizeBytes: 4344, + rowsCount: 1234, + timestampsHeader: timestampsHeader{ + blockOffset: 13234, + blockSize: 8843, + minTimestamp: -4334, + maxTimestamp: 23434, + marshalType: encoding.MarshalTypeNearestDelta2, + }, + columnsHeaderOffset: 4384, + columnsHeaderSize: 894, + } + data := bh.marshal(nil) + for len(data) > 0 { + data = data[:len(data)-1] + f(data) + } +} + +func TestColumnsHeaderUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + csh := getColumnsHeader() + defer putColumnsHeader(csh) + err := csh.unmarshal(data) + if err == nil { + t.Fatalf("expecting non-nil error") + } + } + f(nil) + f([]byte("foo")) + + csh := columnsHeader{ + columnHeaders: []columnHeader{ + { + name: "foobar", + valueType: valueTypeString, + valuesOffset: 12345, + valuesSize: 23434, + bloomFilterOffset: 89843, + bloomFilterSize: 8934, + }, + { + name: "message", + valueType: valueTypeUint16, + minValue: 123, + maxValue: 456, + valuesOffset: 3412345, + valuesSize: 234434, + bloomFilterOffset: 83, + bloomFilterSize: 34, + }, + }, + constColumns: []Field{ + { + Name: "foo", + Value: "bar", + }, + }, + } + data := csh.marshal(nil) + for len(data) > 0 { + data = data[:len(data)-1] + f(data) + } +} + +func TestBlockHeaderReset(t *testing.T) { + bh := &blockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + lo: 3443, + hi: 23434, + }, + }, + uncompressedSizeBytes: 8984, + rowsCount: 1234, + timestampsHeader: timestampsHeader{ + blockOffset: 13234, + blockSize: 8843, + minTimestamp: -4334, + maxTimestamp: 23434, + marshalType: encoding.MarshalTypeNearestDelta2, + }, + columnsHeaderOffset: 12332, + columnsHeaderSize: 234, + } + bh.reset() + bhZero := &blockHeader{} + if !reflect.DeepEqual(bh, bhZero) { + t.Fatalf("unexpected non-zero blockHeader after reset: %v", bh) + } +} + +func TestColumnsHeaderReset(t *testing.T) { + csh := &columnsHeader{ + columnHeaders: []columnHeader{ + { + name: "foobar", + valueType: valueTypeString, + valuesOffset: 12345, + valuesSize: 23434, + bloomFilterOffset: 89843, + bloomFilterSize: 8934, + }, + { + name: "message", + valueType: valueTypeUint16, + minValue: 123, + maxValue: 456, + valuesOffset: 3412345, + valuesSize: 234434, + bloomFilterOffset: 83, + bloomFilterSize: 34, + }, + }, + constColumns: []Field{ + { + Name: "foo", + Value: "bar", + }, + }, + } + csh.reset() + cshZero := &columnsHeader{ + columnHeaders: []columnHeader{}, + constColumns: []Field{}, + } + if !reflect.DeepEqual(csh, cshZero) { + t.Fatalf("unexpected non-zero columnsHeader after reset: %v", csh) + } +} + +func TestMarshalUnmarshalBlockHeaders(t *testing.T) { + f := func(bhs []blockHeader, marshaledLen int) { + t.Helper() + var data []byte + for i := range bhs { + data = bhs[i].marshal(data) + } + if len(data) != marshaledLen { + t.Fatalf("unexpected length for marshaled blockHeader entries; got %d; want %d", len(data), marshaledLen) + } + bhs2, err := unmarshalBlockHeaders(nil, data) + if err != nil { + t.Fatalf("unexpected error when unmarshaling blockHeader entries: %s", err) + } + if !reflect.DeepEqual(bhs, bhs2) { + t.Fatalf("unexpected blockHeader entries unmarshaled\ngot\n%v\nwant\n%v", bhs2, bhs) + } + } + f(nil, 0) + f([]blockHeader{{}}, 61) + f([]blockHeader{ + {}, + { + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + lo: 3443, + hi: 23434, + }, + }, + uncompressedSizeBytes: 89894, + rowsCount: 1234, + timestampsHeader: timestampsHeader{ + blockOffset: 13234, + blockSize: 8843, + minTimestamp: -4334, + maxTimestamp: 23434, + marshalType: encoding.MarshalTypeNearestDelta2, + }, + columnsHeaderOffset: 12332, + columnsHeaderSize: 234, + }, + }, 127) +} + +func TestColumnHeaderMarshalUnmarshal(t *testing.T) { + f := func(ch *columnHeader, marshaledLen int) { + t.Helper() + data := ch.marshal(nil) + if len(data) != marshaledLen { + t.Fatalf("unexpected marshaled length of columnHeader; got %d; want %d", len(data), marshaledLen) + } + var ch2 columnHeader + tail, err := ch2.unmarshal(data) + if err != nil { + t.Fatalf("unexpected error in umarshal(%v): %s", ch, err) + } + if len(tail) > 0 { + t.Fatalf("unexpected non-empty tail after unmarshal(%v): %X", ch, tail) + } + if !reflect.DeepEqual(ch, &ch2) { + t.Fatalf("unexpected columnHeader after unmarshal;\ngot\n%v\nwant\n%v", &ch2, ch) + } + } + f(&columnHeader{ + name: "foo", + valueType: valueTypeUint8, + }, 11) + ch := &columnHeader{ + name: "foobar", + valueType: valueTypeDict, + + valuesOffset: 12345, + valuesSize: 254452, + } + ch.valuesDict.getOrAdd("abc") + f(ch, 18) +} + +func TestColumnHeaderUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + dataOrig := append([]byte{}, data...) + var ch columnHeader + tail, err := ch.unmarshal(data) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if string(tail) != string(dataOrig) { + t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig) + } + } + f(nil) + f([]byte("foo")) + + ch := &columnHeader{ + name: "abc", + valueType: valueTypeUint16, + bloomFilterSize: 3244, + } + data := ch.marshal(nil) + f(data[:len(data)-1]) +} + +func TestColumnHeaderReset(t *testing.T) { + ch := &columnHeader{ + name: "foobar", + valueType: valueTypeUint16, + + valuesOffset: 12345, + valuesSize: 254452, + + bloomFilterOffset: 34898234, + bloomFilterSize: 873434, + } + ch.valuesDict.getOrAdd("abc") + ch.reset() + chZero := &columnHeader{} + chZero.valuesDict.values = []string{} + if !reflect.DeepEqual(ch, chZero) { + t.Fatalf("unexpected non-zero columnHeader after reset: %v", ch) + } +} + +func TestTimestampsHeaderMarshalUnmarshal(t *testing.T) { + f := func(th *timestampsHeader, marshaledLen int) { + t.Helper() + data := th.marshal(nil) + if len(data) != marshaledLen { + t.Fatalf("unexpected length of marshaled timestampsHeader; got %d; want %d", len(data), marshaledLen) + } + var th2 timestampsHeader + tail, err := th2.unmarshal(data) + if err != nil { + t.Fatalf("unexpected error in unmarshal(%v): %s", th, err) + } + if len(tail) > 0 { + t.Fatalf("unexpected non-nil tail after unmarshal(%v): %X", th, tail) + } + if !reflect.DeepEqual(th, &th2) { + t.Fatalf("unexpected timestampsHeader after unmarshal; got\n%v\nwant\n%v", &th2, th) + } + } + f(×tampsHeader{}, 33) + + f(×tampsHeader{ + blockOffset: 12345, + blockSize: 3424834, + minTimestamp: -123443, + maxTimestamp: 234343, + marshalType: encoding.MarshalTypeZSTDNearestDelta, + }, 33) +} + +func TestTimestampsHeaderUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + dataOrig := append([]byte{}, data...) + var th timestampsHeader + tail, err := th.unmarshal(data) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if string(tail) != string(dataOrig) { + t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig) + } + } + f(nil) + f([]byte("foo")) +} + +func TestTimestampsHeaderReset(t *testing.T) { + th := ×tampsHeader{ + blockOffset: 12345, + blockSize: 3424834, + minTimestamp: -123443, + maxTimestamp: 234343, + marshalType: encoding.MarshalTypeZSTDNearestDelta, + } + th.reset() + thZero := ×tampsHeader{} + if !reflect.DeepEqual(th, thZero) { + t.Fatalf("unexpected non-zero timestampsHeader after reset: %v", th) + } +} diff --git a/lib/logstorage/block_search.go b/lib/logstorage/block_search.go new file mode 100644 index 000000000..4157189d0 --- /dev/null +++ b/lib/logstorage/block_search.go @@ -0,0 +1,645 @@ +package logstorage + +import ( + "strconv" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +type blockSearchWork struct { + // p is the part where the block belongs to. + p *part + + // so contains search options for the block search + so *searchOptions + + // bh is the header of the block to search. + bh blockHeader +} + +func newBlockSearchWork(p *part, so *searchOptions, bh *blockHeader) *blockSearchWork { + var bsw blockSearchWork + bsw.p = p + bsw.so = so + bsw.bh.copyFrom(bh) + return &bsw +} + +func getBlockSearch() *blockSearch { + v := blockSearchPool.Get() + if v == nil { + return &blockSearch{} + } + return v.(*blockSearch) +} + +func putBlockSearch(bs *blockSearch) { + bs.reset() + blockSearchPool.Put(bs) +} + +var blockSearchPool sync.Pool + +type blockSearch struct { + // bsw is the actual work to perform on the given block pointed by bsw.ph + bsw *blockSearchWork + + // br contains result for the search in the block after search() call + br blockResult + + // timestampsCache contains cached timestamps for the given block. + timestampsCache *encoding.Int64s + + // bloomFilterCache contains cached bloom filters for requested columns in the given block + bloomFilterCache map[string]*bloomFilter + + // valuesCache contains cached values for requested columns in the given block + valuesCache map[string]*stringBucket + + // sbu is used for unmarshaling local columns + sbu stringsBlockUnmarshaler + + // csh is the columnsHeader associated with the given block + csh columnsHeader +} + +func (bs *blockSearch) reset() { + bs.bsw = nil + bs.br.reset() + + if bs.timestampsCache != nil { + encoding.PutInt64s(bs.timestampsCache) + bs.timestampsCache = nil + } + + bloomFilterCache := bs.bloomFilterCache + for k, bf := range bloomFilterCache { + putBloomFilter(bf) + delete(bloomFilterCache, k) + } + + valuesCache := bs.valuesCache + for k, values := range valuesCache { + putStringBucket(values) + delete(valuesCache, k) + } + + bs.sbu.reset() + bs.csh.reset() +} + +func (bs *blockSearch) partPath() string { + return bs.bsw.p.path +} + +func (bs *blockSearch) search(bsw *blockSearchWork) { + bs.reset() + + bs.bsw = bsw + + bs.csh.initFromBlockHeader(bsw.p, &bsw.bh) + + // search rows matching the given filter + bm := getFilterBitmap(int(bsw.bh.rowsCount)) + bm.setBits() + bs.bsw.so.filter.apply(bs, bm) + + bs.br.mustInit(bs, bm) + if bm.isZero() { + putFilterBitmap(bm) + return + } + + // fetch the requested columns to bs.br. + for _, columnName := range bs.bsw.so.resultColumnNames { + switch columnName { + case "_stream": + bs.br.addStreamColumn(bs) + case "_time": + bs.br.addTimeColumn(bs) + default: + v := bs.csh.getConstColumnValue(columnName) + if v != "" { + bs.br.addConstColumn(v) + continue + } + ch := bs.csh.getColumnHeader(columnName) + if ch == nil { + bs.br.addConstColumn("") + } else { + bs.br.addColumn(bs, ch, bm) + } + } + } + putFilterBitmap(bm) +} + +func (csh *columnsHeader) initFromBlockHeader(p *part, bh *blockHeader) { + bb := longTermBufPool.Get() + columnsHeaderSize := bh.columnsHeaderSize + if columnsHeaderSize > maxColumnsHeaderSize { + logger.Panicf("FATAL: %s: columns header size cannot exceed %d bytes; got %d bytes", p.path, maxColumnsHeaderSize, columnsHeaderSize) + } + bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize)) + p.columnsHeaderFile.MustReadAt(bb.B, int64(bh.columnsHeaderOffset)) + + if err := csh.unmarshal(bb.B); err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", p.path, err) + } + longTermBufPool.Put(bb) +} + +// getBloomFilterForColumn returns bloom filter for the given ch. +// +// The returned bloom filter belongs to bs, so it becomes invalid after bs reset. +func (bs *blockSearch) getBloomFilterForColumn(ch *columnHeader) *bloomFilter { + bf := bs.bloomFilterCache[ch.name] + if bf != nil { + return bf + } + + p := bs.bsw.p + + bloomFilterFile := p.fieldBloomFilterFile + if ch.name == "" { + bloomFilterFile = p.messageBloomFilterFile + } + + bb := longTermBufPool.Get() + bloomFilterSize := ch.bloomFilterSize + if bloomFilterSize > maxBloomFilterBlockSize { + logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxBloomFilterBlockSize, bloomFilterSize) + } + bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(bloomFilterSize)) + bloomFilterFile.MustReadAt(bb.B, int64(ch.bloomFilterOffset)) + bf = getBloomFilter() + if err := bf.unmarshal(bb.B); err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal bloom filter: %s", bs.partPath(), err) + } + longTermBufPool.Put(bb) + + if bs.bloomFilterCache == nil { + bs.bloomFilterCache = make(map[string]*bloomFilter) + } + bs.bloomFilterCache[ch.name] = bf + return bf +} + +// getValuesForColumn returns block values for the given ch. +// +// The returned values belong to bs, so they become invalid after bs reset. +func (bs *blockSearch) getValuesForColumn(ch *columnHeader) []string { + values := bs.valuesCache[ch.name] + if values != nil { + return values.a + } + + p := bs.bsw.p + + valuesFile := p.fieldValuesFile + if ch.name == "" { + valuesFile = p.messageValuesFile + } + + bb := longTermBufPool.Get() + valuesSize := ch.valuesSize + if valuesSize > maxValuesBlockSize { + logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxValuesBlockSize, valuesSize) + } + bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(valuesSize)) + valuesFile.MustReadAt(bb.B, int64(ch.valuesOffset)) + + values = getStringBucket() + var err error + values.a, err = bs.sbu.unmarshal(values.a[:0], bb.B, bs.bsw.bh.rowsCount) + longTermBufPool.Put(bb) + if err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal column %q: %s", bs.partPath(), ch.name, err) + } + + if bs.valuesCache == nil { + bs.valuesCache = make(map[string]*stringBucket) + } + bs.valuesCache[ch.name] = values + return values.a +} + +// getTimestamps returns timestamps for the given bs. +// +// The returned timestamps belong to bs, so they become invalid after bs reset. +func (bs *blockSearch) getTimestamps() []int64 { + timestamps := bs.timestampsCache + if timestamps != nil { + return timestamps.A + } + + p := bs.bsw.p + + bb := longTermBufPool.Get() + th := &bs.bsw.bh.timestampsHeader + blockSize := th.blockSize + if blockSize > maxTimestampsBlockSize { + logger.Panicf("FATAL: %s: timestamps block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxTimestampsBlockSize, blockSize) + } + bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(blockSize)) + p.timestampsFile.MustReadAt(bb.B, int64(th.blockOffset)) + + rowsCount := int(bs.bsw.bh.rowsCount) + timestamps = encoding.GetInt64s(rowsCount) + var err error + timestamps.A, err = encoding.UnmarshalTimestamps(timestamps.A[:0], bb.B, th.marshalType, th.minTimestamp, rowsCount) + longTermBufPool.Put(bb) + if err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal timestamps: %s", bs.partPath(), err) + } + bs.timestampsCache = timestamps + return timestamps.A +} + +// mustReadBlockHeaders reads ih block headers from p, appends them to dst and returns the result. +func (ih *indexBlockHeader) mustReadBlockHeaders(dst []blockHeader, p *part) []blockHeader { + bbCompressed := longTermBufPool.Get() + indexBlockSize := ih.indexBlockSize + if indexBlockSize > maxIndexBlockSize { + logger.Panicf("FATAL: %s: index block size cannot exceed %d bytes; got %d bytes", p.indexFile.Path(), maxIndexBlockSize, indexBlockSize) + } + bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize)) + p.indexFile.MustReadAt(bbCompressed.B, int64(ih.indexBlockOffset)) + + bb := longTermBufPool.Get() + var err error + bb.B, err = encoding.DecompressZSTD(bb.B, bbCompressed.B) + longTermBufPool.Put(bbCompressed) + if err != nil { + logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err) + } + + dst, err = unmarshalBlockHeaders(dst, bb.B) + longTermBufPool.Put(bb) + if err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal block headers read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err) + } + + return dst +} + +type blockResult struct { + buf []byte + valuesBuf []string + + // streamID is streamID for the given blockResult + streamID streamID + + // cs contain values for result columns + cs []blockResultColumn + + // timestamps contain timestamps for the selected log entries + timestamps []int64 +} + +func (br *blockResult) reset() { + br.buf = br.buf[:0] + + vb := br.valuesBuf + for i := range vb { + vb[i] = "" + } + br.valuesBuf = vb[:0] + + br.streamID.reset() + + cs := br.cs + for i := range cs { + cs[i].reset() + } + br.cs = cs[:0] + + br.timestamps = br.timestamps[:0] +} + +func (br *blockResult) RowsCount() int { + return len(br.timestamps) +} + +func (br *blockResult) mustInit(bs *blockSearch, bm *filterBitmap) { + br.reset() + + br.streamID = bs.bsw.bh.streamID + + if !bm.isZero() { + // Initialize timestamps, since they are used for determining the number of rows in br.RowsCount() + srcTimestamps := bs.getTimestamps() + dstTimestamps := br.timestamps[:0] + bm.forEachSetBit(func(idx int) bool { + ts := srcTimestamps[idx] + dstTimestamps = append(dstTimestamps, ts) + return true + }) + br.timestamps = dstTimestamps + } +} + +func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *filterBitmap) { + buf := br.buf + valuesBuf := br.valuesBuf + valuesBufLen := len(valuesBuf) + var dictValues []string + + appendValue := func(v string) { + bufLen := len(buf) + buf = append(buf, v...) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + valuesBuf = append(valuesBuf, s) + } + + switch ch.valueType { + case valueTypeString: + visitValues(bs, ch, bm, func(v string) bool { + appendValue(v) + return true + }) + case valueTypeDict: + dictValues = ch.valuesDict.values + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected dict value size for column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v)) + } + dictIdx := v[0] + if int(dictIdx) >= len(dictValues) { + logger.Panicf("FATAL: %s: too big dict index for column %q: %d; should be smaller than %d", bs.partPath(), ch.name, dictIdx, len(dictValues)) + } + appendValue(v) + return true + }) + case valueTypeUint8: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected size for uint8 column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeUint16: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 2 { + logger.Panicf("FATAL: %s: unexpected size for uint16 column %q; got %d bytes; want 2 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeUint32: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected size for uint32 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeUint64: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected size for uint64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeFloat64: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected size for float64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeIPv4: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected size for ipv4 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + case valueTypeTimestampISO8601: + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected size for timestmap column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v)) + } + appendValue(v) + return true + }) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d for column %q", bs.partPath(), ch.valueType, ch.name) + } + + encodedValues := valuesBuf[valuesBufLen:] + + valuesBufLen = len(valuesBuf) + for _, v := range dictValues { + appendValue(v) + } + dictValues = valuesBuf[valuesBufLen:] + + br.cs = append(br.cs, blockResultColumn{ + valueType: ch.valueType, + dictValues: dictValues, + encodedValues: encodedValues, + }) + br.buf = buf + br.valuesBuf = valuesBuf +} + +func (br *blockResult) addTimeColumn(bs *blockSearch) { + br.cs = append(br.cs, blockResultColumn{ + isTime: true, + }) +} + +func (br *blockResult) addStreamColumn(bs *blockSearch) { + bb := bbPool.Get() + bb.B = bs.bsw.p.pt.appendStreamTagsByStreamID(bb.B[:0], &br.streamID) + if len(bb.B) > 0 { + st := GetStreamTags() + mustUnmarshalStreamTags(st, bb.B) + bb.B = st.marshalString(bb.B[:0]) + PutStreamTags(st) + } + s := bytesutil.ToUnsafeString(bb.B) + br.addConstColumn(s) + bbPool.Put(bb) +} + +func (br *blockResult) addConstColumn(value string) { + buf := br.buf + bufLen := len(buf) + buf = append(buf, value...) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + br.buf = buf + + valuesBuf := br.valuesBuf + valuesBufLen := len(valuesBuf) + valuesBuf = append(valuesBuf, s) + br.valuesBuf = valuesBuf + + br.cs = append(br.cs, blockResultColumn{ + isConst: true, + valueType: valueTypeUnknown, + encodedValues: valuesBuf[valuesBufLen:], + }) +} + +// getColumnValues returns values for the column with the given idx. +// +// The returned values are valid until br.reset() is called. +func (br *blockResult) getColumnValues(idx int) []string { + c := &br.cs[idx] + if c.values != nil { + return c.values + } + + buf := br.buf + valuesBuf := br.valuesBuf + valuesBufLen := len(valuesBuf) + + if c.isConst { + v := c.encodedValues[0] + for range br.timestamps { + valuesBuf = append(valuesBuf, v) + } + c.values = valuesBuf[valuesBufLen:] + br.valuesBuf = valuesBuf + return c.values + } + if c.isTime { + for _, timestamp := range br.timestamps { + t := time.Unix(0, timestamp).UTC() + bufLen := len(buf) + buf = t.AppendFormat(buf, time.RFC3339Nano) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + valuesBuf = append(valuesBuf, s) + } + c.values = valuesBuf[valuesBufLen:] + br.buf = buf + br.valuesBuf = valuesBuf + return c.values + } + + appendValue := func(v string) { + bufLen := len(buf) + buf = append(buf, v...) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + valuesBuf = append(valuesBuf, s) + } + + switch c.valueType { + case valueTypeString: + c.values = c.encodedValues + return c.values + case valueTypeDict: + dictValues := c.dictValues + for _, v := range c.encodedValues { + dictIdx := v[0] + appendValue(dictValues[dictIdx]) + } + case valueTypeUint8: + bb := bbPool.Get() + for _, v := range c.encodedValues { + n := uint64(v[0]) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeUint16: + bb := bbPool.Get() + for _, v := range c.encodedValues { + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint16(b)) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeUint32: + bb := bbPool.Get() + for _, v := range c.encodedValues { + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint32(b)) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeUint64: + bb := bbPool.Get() + for _, v := range c.encodedValues { + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint64(b) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeFloat64: + bb := bbPool.Get() + for _, v := range c.encodedValues { + bb.B = toFloat64String(bb.B[:0], v) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeIPv4: + bb := bbPool.Get() + for _, v := range c.encodedValues { + bb.B = toIPv4String(bb.B[:0], v) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + case valueTypeTimestampISO8601: + bb := bbPool.Get() + for _, v := range c.encodedValues { + bb.B = toTimestampISO8601String(bb.B[:0], v) + appendValue(bytesutil.ToUnsafeString(bb.B)) + } + bbPool.Put(bb) + default: + logger.Panicf("BUG: unknown valueType=%d", c.valueType) + } + + c.values = valuesBuf[valuesBufLen:] + br.buf = buf + br.valuesBuf = valuesBuf + + return c.values +} + +type blockResultColumn struct { + // isConst is set to true if the column is const. + // + // The column value is stored in encodedValues[0] + isConst bool + + // isTime is set to true if the column contains _time values. + // + // The column values are stored in blockResult.timestamps + isTime bool + + // valueType is the type of non-cost value + valueType valueType + + // dictValues contain dictionary values for valueTypeDict column + dictValues []string + + // encodedValues contain encoded values for non-const column + encodedValues []string + + // values contain decoded values after getColumnValues() call for the given column + values []string +} + +func (c *blockResultColumn) reset() { + c.isConst = false + c.isTime = false + c.valueType = valueTypeUnknown + c.dictValues = nil + c.encodedValues = nil + c.values = nil +} diff --git a/lib/logstorage/block_stream_merger.go b/lib/logstorage/block_stream_merger.go new file mode 100644 index 000000000..6137c2406 --- /dev/null +++ b/lib/logstorage/block_stream_merger.go @@ -0,0 +1,288 @@ +package logstorage + +import ( + "container/heap" + "fmt" + "strings" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// mustMergeBlockStreams merges bsrs to bsw and updates ph accordingly. +// +// Finalize() is guaranteed to be called on bsrs and bsw before returning from the func. +func mustMergeBlockStreams(ph *partHeader, bsw *blockStreamWriter, bsrs []*blockStreamReader, stopCh <-chan struct{}) { + bsm := getBlockStreamMerger() + bsm.mustInit(bsw, bsrs) + for len(bsm.readersHeap) > 0 { + if needStop(stopCh) { + break + } + bsr := bsm.readersHeap[0] + bsm.mustWriteBlock(&bsr.blockData, bsw) + if bsr.NextBlock() { + heap.Fix(&bsm.readersHeap, 0) + } else { + heap.Pop(&bsm.readersHeap) + } + } + bsm.mustFlushRows() + putBlockStreamMerger(bsm) + + bsw.Finalize(ph) + mustCloseBlockStreamReaders(bsrs) +} + +// blockStreamMerger merges block streams +type blockStreamMerger struct { + // bsw is the block stream writer to write the merged blocks. + bsw *blockStreamWriter + + // bsrs contains the original readers passed to mustInit(). + // They are used by ReadersPaths() + bsrs []*blockStreamReader + + // readersHeap contains a heap of readers to read blocks to merge. + readersHeap blockStreamReadersHeap + + // streamID is the stream ID for the pending data. + streamID streamID + + // sbu is the unmarshaler for strings in rows and rowsTmp. + sbu *stringsBlockUnmarshaler + + // vd is the decoder for unmarshaled strings. + vd *valuesDecoder + + // bd is the pending blockData. + // bd is unpacked into rows when needed. + bd blockData + + // rows is pending log entries. + rows rows + + // rowsTmp is temporary storage for log entries during merge. + rowsTmp rows + + // uncompressedRowsSizeBytes is the current size of uncompressed rows. + // + // It is used for flushing rows to blocks when their size reaches maxUncompressedBlockSize + uncompressedRowsSizeBytes uint64 +} + +func (bsm *blockStreamMerger) reset() { + bsm.bsw = nil + + rhs := bsm.readersHeap + for i := range rhs { + rhs[i] = nil + } + bsm.readersHeap = rhs[:0] + + bsm.streamID.reset() + bsm.resetRows() +} + +func (bsm *blockStreamMerger) resetRows() { + if bsm.sbu != nil { + putStringsBlockUnmarshaler(bsm.sbu) + bsm.sbu = nil + } + if bsm.vd != nil { + putValuesDecoder(bsm.vd) + bsm.vd = nil + } + bsm.bd.reset() + + bsm.rows.reset() + bsm.rowsTmp.reset() + + bsm.uncompressedRowsSizeBytes = 0 +} + +func (bsm *blockStreamMerger) mustInit(bsw *blockStreamWriter, bsrs []*blockStreamReader) { + bsm.reset() + + bsm.bsw = bsw + bsm.bsrs = bsrs + + rsh := bsm.readersHeap[:0] + for _, bsr := range bsrs { + if bsr.NextBlock() { + rsh = append(rsh, bsr) + } + } + bsm.readersHeap = rsh + heap.Init(&bsm.readersHeap) +} + +// mustWriteBlock writes bd to bsm +func (bsm *blockStreamMerger) mustWriteBlock(bd *blockData, bsw *blockStreamWriter) { + bsm.checkNextBlock(bd) + switch { + case !bd.streamID.equal(&bsm.streamID): + // The bd contains another streamID. + // Write the current log entries under the current streamID, then process the bd. + bsm.mustFlushRows() + bsm.streamID = bd.streamID + if bd.uncompressedSizeBytes >= maxUncompressedBlockSize { + // Fast path - write full bd to the output without extracting log entries from it. + bsw.MustWriteBlockData(bd) + } else { + // Slow path - copy the bd to the curr bd. + bsm.bd.copyFrom(bd) + } + case bd.uncompressedSizeBytes >= maxUncompressedBlockSize: + // The bd contains the same streamID and it is full, + // so it can be written next after the current log entries + // without the need to merge the bd with the current log entries. + // Write the current log entries and then the bd. + bsm.mustFlushRows() + bsw.MustWriteBlockData(bd) + default: + // The bd contains the same streamID and it isn't full, + // so it must be merged with the current log entries. + bsm.mustMergeRows(bd) + } +} + +// checkNextBlock checks whether the bd can be written next after the current data. +func (bsm *blockStreamMerger) checkNextBlock(bd *blockData) { + if len(bsm.rows.timestamps) > 0 && bsm.bd.rowsCount > 0 { + logger.Panicf("BUG: bsm.bd must be empty when bsm.rows isn't empty! got %d log entries in bsm.bd", bsm.bd.rowsCount) + } + if bd.streamID.less(&bsm.streamID) { + logger.Panicf("FATAL: cannot merge %s: the streamID=%s for the next block is smaller than the streamID=%s for the current block", + bsm.ReadersPaths(), &bd.streamID, &bsm.streamID) + } + if !bd.streamID.equal(&bsm.streamID) { + return + } + // streamID at bd equals streamID at bsm. Check that minTimestamp in bd is bigger or equal to the minTimestmap at bsm. + if bd.rowsCount == 0 { + return + } + nextMinTimestamp := bd.timestampsData.minTimestamp + if len(bsm.rows.timestamps) == 0 { + if bsm.bd.rowsCount == 0 { + return + } + minTimestamp := bsm.bd.timestampsData.minTimestamp + if nextMinTimestamp < minTimestamp { + logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for the current block", + bsm.ReadersPaths(), nextMinTimestamp, minTimestamp) + } + return + } + minTimestamp := bsm.rows.timestamps[0] + if nextMinTimestamp < minTimestamp { + logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for log entries for the current block", + bsm.ReadersPaths(), nextMinTimestamp, minTimestamp) + } +} + +// ReadersPaths returns paths for input blockStreamReaders +func (bsm *blockStreamMerger) ReadersPaths() string { + paths := make([]string, len(bsm.bsrs)) + for i, bsr := range bsm.bsrs { + paths[i] = bsr.Path() + } + return fmt.Sprintf("[%s]", strings.Join(paths, ",")) +} + +// mustMergeRows merges the current log entries inside bsm with bd log entries. +func (bsm *blockStreamMerger) mustMergeRows(bd *blockData) { + if bsm.bd.rowsCount > 0 { + // Unmarshal log entries from bsm.bd + bsm.mustUnmarshalRows(&bsm.bd) + bsm.bd.reset() + } + + // Unmarshal log entries from bd + rowsLen := len(bsm.rows.timestamps) + bsm.mustUnmarshalRows(bd) + + // Merge unmarshaled log entries + timestamps := bsm.rows.timestamps + rows := bsm.rows.rows + bsm.rowsTmp.mergeRows(timestamps[:rowsLen], timestamps[rowsLen:], rows[:rowsLen], rows[rowsLen:]) + bsm.rows, bsm.rowsTmp = bsm.rowsTmp, bsm.rows + bsm.rowsTmp.reset() + + if bsm.uncompressedRowsSizeBytes >= maxUncompressedBlockSize { + bsm.mustFlushRows() + } +} + +func (bsm *blockStreamMerger) mustUnmarshalRows(bd *blockData) { + rowsLen := len(bsm.rows.timestamps) + if bsm.sbu == nil { + bsm.sbu = getStringsBlockUnmarshaler() + } + if bsm.vd == nil { + bsm.vd = getValuesDecoder() + } + if err := bd.unmarshalRows(&bsm.rows, bsm.sbu, bsm.vd); err != nil { + logger.Panicf("FATAL: cannot merge %s: cannot unmarshal log entries from blockData: %s", bsm.ReadersPaths(), err) + } + bsm.uncompressedRowsSizeBytes += uncompressedRowsSizeBytes(bsm.rows.rows[rowsLen:]) +} + +func (bsm *blockStreamMerger) mustFlushRows() { + if len(bsm.rows.timestamps) == 0 { + bsm.bsw.MustWriteBlockData(&bsm.bd) + } else { + bsm.bsw.MustWriteRows(&bsm.streamID, bsm.rows.timestamps, bsm.rows.rows) + } + bsm.resetRows() +} + +func getBlockStreamMerger() *blockStreamMerger { + v := blockStreamMergerPool.Get() + if v == nil { + return &blockStreamMerger{} + } + return v.(*blockStreamMerger) +} + +func putBlockStreamMerger(bsm *blockStreamMerger) { + bsm.reset() + blockStreamMergerPool.Put(bsm) +} + +var blockStreamMergerPool sync.Pool + +type blockStreamReadersHeap []*blockStreamReader + +func (h *blockStreamReadersHeap) Len() int { + return len(*h) +} + +func (h *blockStreamReadersHeap) Less(i, j int) bool { + x := *h + a := &x[i].blockData + b := &x[j].blockData + if !a.streamID.equal(&b.streamID) { + return a.streamID.less(&b.streamID) + } + return a.timestampsData.minTimestamp < b.timestampsData.minTimestamp +} + +func (h *blockStreamReadersHeap) Swap(i, j int) { + x := *h + x[i], x[j] = x[j], x[i] +} + +func (h *blockStreamReadersHeap) Push(v interface{}) { + bsr := v.(*blockStreamReader) + *h = append(*h, bsr) +} + +func (h *blockStreamReadersHeap) Pop() interface{} { + x := *h + bsr := x[len(x)-1] + x[len(x)-1] = nil + *h = x[:len(x)-1] + return bsr +} diff --git a/lib/logstorage/block_stream_reader.go b/lib/logstorage/block_stream_reader.go new file mode 100644 index 000000000..00bcbc4e5 --- /dev/null +++ b/lib/logstorage/block_stream_reader.go @@ -0,0 +1,383 @@ +package logstorage + +import ( + "path/filepath" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +type readerWithStats struct { + r filestream.ReadCloser + bytesRead uint64 +} + +func (r *readerWithStats) reset() { + r.r = nil + r.bytesRead = 0 +} + +func (r *readerWithStats) init(rc filestream.ReadCloser) { + r.reset() + + r.r = rc +} + +// Path returns the path to r file +func (r *readerWithStats) Path() string { + return r.r.Path() +} + +// MustReadFull reads len(data) to r. +func (r *readerWithStats) MustReadFull(data []byte) { + fs.MustReadData(r.r, data) + r.bytesRead += uint64(len(data)) +} + +func (r *readerWithStats) Read(p []byte) (int, error) { + n, err := r.r.Read(p) + r.bytesRead += uint64(n) + return n, err +} + +func (r *readerWithStats) MustClose() { + r.r.MustClose() + r.r = nil +} + +// streamReaders contains readers for blockStreamReader +type streamReaders struct { + metaindexReader readerWithStats + indexReader readerWithStats + columnsHeaderReader readerWithStats + timestampsReader readerWithStats + fieldValuesReader readerWithStats + fieldBloomFilterReader readerWithStats + messageValuesReader readerWithStats + messageBloomFilterReader readerWithStats +} + +func (sr *streamReaders) reset() { + sr.metaindexReader.reset() + sr.indexReader.reset() + sr.columnsHeaderReader.reset() + sr.timestampsReader.reset() + sr.fieldValuesReader.reset() + sr.fieldBloomFilterReader.reset() + sr.messageValuesReader.reset() + sr.messageBloomFilterReader.reset() +} + +func (sr *streamReaders) init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader, fieldValuesReader, fieldBloomFilterReader, + messageValuesReader, messageBloomFilterReader filestream.ReadCloser, +) { + sr.metaindexReader.init(metaindexReader) + sr.indexReader.init(indexReader) + sr.columnsHeaderReader.init(columnsHeaderReader) + sr.timestampsReader.init(timestampsReader) + sr.fieldValuesReader.init(fieldValuesReader) + sr.fieldBloomFilterReader.init(fieldBloomFilterReader) + sr.messageValuesReader.init(messageValuesReader) + sr.messageBloomFilterReader.init(messageBloomFilterReader) +} + +func (sr *streamReaders) totalBytesRead() uint64 { + n := uint64(0) + n += sr.metaindexReader.bytesRead + n += sr.indexReader.bytesRead + n += sr.columnsHeaderReader.bytesRead + n += sr.timestampsReader.bytesRead + n += sr.fieldValuesReader.bytesRead + n += sr.fieldBloomFilterReader.bytesRead + n += sr.messageValuesReader.bytesRead + n += sr.messageBloomFilterReader.bytesRead + return n +} + +func (sr *streamReaders) MustClose() { + sr.metaindexReader.MustClose() + sr.indexReader.MustClose() + sr.columnsHeaderReader.MustClose() + sr.timestampsReader.MustClose() + sr.fieldValuesReader.MustClose() + sr.fieldBloomFilterReader.MustClose() + sr.messageValuesReader.MustClose() + sr.messageBloomFilterReader.MustClose() +} + +// blockStreamReader is used for reading blocks in streaming manner from a part. +type blockStreamReader struct { + // blockData contains the data for the last read block + blockData blockData + + // ph is the header for the part + ph partHeader + + // streamReaders contains data readers in stream mode + streamReaders streamReaders + + // indexBlockHeaders contains the list of all the indexBlockHeader entries for the part + indexBlockHeaders []indexBlockHeader + + // blockHeaders contains the list of blockHeader entries for the current indexBlockHeader pointed by nextIndexBlockIdx + blockHeaders []blockHeader + + // nextIndexBlockIdx is the index of the next item to read from indexBlockHeaders + nextIndexBlockIdx int + + // nextBlockIdx is the index of the next item to read from blockHeaders + nextBlockIdx int + + // globalUncompressedSizeBytes is the total size of log entries seen in the part + globalUncompressedSizeBytes uint64 + + // globalRowsCount is the number of log entries seen in the part + globalRowsCount uint64 + + // globalBlocksCount is the number of blocks seen in the part + globalBlocksCount uint64 + + // sidLast is the stream id for the previously read block + sidLast streamID + + // minTimestampLast is the minimum timestamp for the previously read block + minTimestampLast int64 +} + +// reset resets bsr, so it can be re-used +func (bsr *blockStreamReader) reset() { + bsr.blockData.reset() + bsr.ph.reset() + bsr.streamReaders.reset() + + ihs := bsr.indexBlockHeaders + if len(ihs) > 10e3 { + // The ihs len is unbound, so it is better to drop too long indexBlockHeaders in order to reduce memory usage + ihs = nil + } + for i := range ihs { + ihs[i].reset() + } + bsr.indexBlockHeaders = ihs[:0] + + bhs := bsr.blockHeaders + for i := range bhs { + bhs[i].reset() + } + bsr.blockHeaders = bhs[:0] + + bsr.nextIndexBlockIdx = 0 + bsr.nextBlockIdx = 0 + bsr.globalUncompressedSizeBytes = 0 + bsr.globalRowsCount = 0 + bsr.globalBlocksCount = 0 + + bsr.sidLast.reset() + bsr.minTimestampLast = 0 +} + +// Path returns part path for bsr (e.g. file path, url or in-memory reference) +func (bsr *blockStreamReader) Path() string { + path := bsr.streamReaders.metaindexReader.Path() + return filepath.Dir(path) +} + +// MustInitFromInmemoryPart initializes bsr from mp. +func (bsr *blockStreamReader) MustInitFromInmemoryPart(mp *inmemoryPart) { + bsr.reset() + + bsr.ph = mp.ph + + // Initialize streamReaders + metaindexReader := mp.metaindex.NewReader() + indexReader := mp.index.NewReader() + columnsHeaderReader := mp.columnsHeader.NewReader() + timestampsReader := mp.timestamps.NewReader() + fieldValuesReader := mp.fieldValues.NewReader() + fieldBloomFilterReader := mp.fieldBloomFilter.NewReader() + messageValuesReader := mp.messageValues.NewReader() + messageBloomFilterReader := mp.messageBloomFilter.NewReader() + + bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader, + fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader) + + // Read metaindex data + bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader) +} + +// MustInitFromFilePart initializes bsr from file part at the given path. +func (bsr *blockStreamReader) MustInitFromFilePart(path string) { + bsr.reset() + + // Files in the part are always read without OS cache pollution, + // since they are usually deleted after the merge. + const nocache = true + + metaindexPath := filepath.Join(path, metaindexFilename) + indexPath := filepath.Join(path, indexFilename) + columnsHeaderPath := filepath.Join(path, columnsHeaderFilename) + timestampsPath := filepath.Join(path, timestampsFilename) + fieldValuesPath := filepath.Join(path, fieldValuesFilename) + fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename) + messageValuesPath := filepath.Join(path, messageValuesFilename) + messageBloomFilterPath := filepath.Join(path, messageBloomFilename) + + bsr.ph.mustReadMetadata(path) + + // Open data readers + metaindexReader := filestream.MustOpen(metaindexPath, nocache) + indexReader := filestream.MustOpen(indexPath, nocache) + columnsHeaderReader := filestream.MustOpen(columnsHeaderPath, nocache) + timestampsReader := filestream.MustOpen(timestampsPath, nocache) + fieldValuesReader := filestream.MustOpen(fieldValuesPath, nocache) + fieldBloomFilterReader := filestream.MustOpen(fieldBloomFilterPath, nocache) + messageValuesReader := filestream.MustOpen(messageValuesPath, nocache) + messageBloomFilterReader := filestream.MustOpen(messageBloomFilterPath, nocache) + + // Initialize streamReaders + bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader, + fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader) + + // Read metaindex data + bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader) +} + +// NextBlock reads the next block from bsr and puts it into bsr.blockData. +// +// false is returned if there are no other blocks. +func (bsr *blockStreamReader) NextBlock() bool { + for bsr.nextBlockIdx >= len(bsr.blockHeaders) { + if !bsr.nextIndexBlock() { + return false + } + } + ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx-1] + bh := &bsr.blockHeaders[bsr.nextBlockIdx] + th := &bh.timestampsHeader + + // Validate bh + if bh.streamID.less(&bsr.sidLast) { + logger.Panicf("FATAL: %s: blockHeader.streamID=%s cannot be smaller than the streamID from the previously read block: %s", bsr.Path(), &bh.streamID, &bsr.sidLast) + } + if bh.streamID.equal(&bsr.sidLast) && th.minTimestamp < bsr.minTimestampLast { + logger.Panicf("FATAL: %s: timestamps.minTimestamp=%d cannot be smaller than the minTimestamp for the previously read block for the same streamID: %d", + bsr.Path(), th.minTimestamp, bsr.minTimestampLast) + } + bsr.minTimestampLast = th.minTimestamp + bsr.sidLast = bh.streamID + if th.minTimestamp < ih.minTimestamp { + logger.Panicf("FATAL: %s: timestampsHeader.minTimestamp=%d cannot be smaller than indexBlockHeader.minTimestamp=%d", bsr.Path(), th.minTimestamp, ih.minTimestamp) + } + if th.maxTimestamp > ih.maxTimestamp { + logger.Panicf("FATAL: %s: timestampsHeader.maxTimestamp=%d cannot be bigger than indexBlockHeader.maxTimestamp=%d", bsr.Path(), th.maxTimestamp, ih.minTimestamp) + } + + // Read bsr.blockData + bsr.blockData.mustReadFrom(bh, &bsr.streamReaders) + + bsr.globalUncompressedSizeBytes += bh.uncompressedSizeBytes + bsr.globalRowsCount += bh.rowsCount + bsr.globalBlocksCount++ + if bsr.globalUncompressedSizeBytes > bsr.ph.UncompressedSizeBytes { + logger.Panicf("FATAL: %s: too big size of entries read: %d; mustn't exceed partHeader.UncompressedSizeBytes=%d", + bsr.Path(), bsr.globalUncompressedSizeBytes, bsr.ph.UncompressedSizeBytes) + } + if bsr.globalRowsCount > bsr.ph.RowsCount { + logger.Panicf("FATAL: %s: too many log entries read so far: %d; mustn't exceed partHeader.RowsCount=%d", bsr.Path(), bsr.globalRowsCount, bsr.ph.RowsCount) + } + if bsr.globalBlocksCount > bsr.ph.BlocksCount { + logger.Panicf("FATAL: %s: too many blocks read so far: %d; mustn't exceed partHeader.BlocksCount=%d", bsr.Path(), bsr.globalBlocksCount, bsr.ph.BlocksCount) + } + + // The block has been sucessfully read + bsr.nextBlockIdx++ + return true +} + +func (bsr *blockStreamReader) nextIndexBlock() bool { + // Advance to the next indexBlockHeader + if bsr.nextIndexBlockIdx >= len(bsr.indexBlockHeaders) { + // No more blocks left + // Validate bsr.ph + totalBytesRead := bsr.streamReaders.totalBytesRead() + if bsr.ph.CompressedSizeBytes != totalBytesRead { + logger.Panicf("FATAL: %s: partHeader.CompressedSizeBytes=%d must match the size of data read: %d", bsr.Path(), bsr.ph.CompressedSizeBytes, totalBytesRead) + } + if bsr.ph.UncompressedSizeBytes != bsr.globalUncompressedSizeBytes { + logger.Panicf("FATAL: %s: partHeader.UncompressedSizeBytes=%d must match the size of entries read: %d", + bsr.Path(), bsr.ph.UncompressedSizeBytes, bsr.globalUncompressedSizeBytes) + } + if bsr.ph.RowsCount != bsr.globalRowsCount { + logger.Panicf("FATAL: %s: partHeader.RowsCount=%d must match the number of log entries read: %d", bsr.Path(), bsr.ph.RowsCount, bsr.globalRowsCount) + } + if bsr.ph.BlocksCount != bsr.globalBlocksCount { + logger.Panicf("FATAL: %s: partHeader.BlocksCount=%d must match the number of blocks read: %d", bsr.Path(), bsr.ph.BlocksCount, bsr.globalBlocksCount) + } + return false + } + ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx] + + // Validate ih + metaindexReader := &bsr.streamReaders.metaindexReader + if ih.minTimestamp < bsr.ph.MinTimestamp { + logger.Panicf("FATAL: %s: indexBlockHeader.minTimestamp=%d cannot be smaller than partHeader.MinTimestamp=%d", + metaindexReader.Path(), ih.minTimestamp, bsr.ph.MinTimestamp) + } + if ih.maxTimestamp > bsr.ph.MaxTimestamp { + logger.Panicf("FATAL: %s: indexBlockHeader.maxTimestamp=%d cannot be bigger than partHeader.MaxTimestamp=%d", + metaindexReader.Path(), ih.maxTimestamp, bsr.ph.MaxTimestamp) + } + + // Read indexBlock for the given ih + bb := longTermBufPool.Get() + bb.B = ih.mustReadNextIndexBlock(bb.B[:0], &bsr.streamReaders) + bsr.blockHeaders = resetBlockHeaders(bsr.blockHeaders) + var err error + bsr.blockHeaders, err = unmarshalBlockHeaders(bsr.blockHeaders[:0], bb.B) + longTermBufPool.Put(bb) + if err != nil { + logger.Panicf("FATAL: %s: cannot unmarshal blockHeader entries: %s", bsr.streamReaders.indexReader.Path(), err) + } + + bsr.nextIndexBlockIdx++ + bsr.nextBlockIdx = 0 + return true +} + +// MustClose closes bsr. +func (bsr *blockStreamReader) MustClose() { + bsr.streamReaders.MustClose() + bsr.reset() +} + +// getBlockStreamReader returns blockStreamReader. +// +// The returned blockStreamReader must be initialized with MustInit(). +// call putBlockStreamReader() when the retruend blockStreamReader is no longer needed. +func getBlockStreamReader() *blockStreamReader { + v := blockStreamReaderPool.Get() + if v == nil { + v = &blockStreamReader{} + } + bsr := v.(*blockStreamReader) + return bsr +} + +// putBlockStreamReader returns bsr to the pool. +// +// bsr cannot be used after returning to the pool. +func putBlockStreamReader(bsr *blockStreamReader) { + bsr.reset() + blockStreamReaderPool.Put(bsr) +} + +var blockStreamReaderPool sync.Pool + +// mustCloseBlockStreamReaders calls MustClose() on the given bsrs. +func mustCloseBlockStreamReaders(bsrs []*blockStreamReader) { + for _, bsr := range bsrs { + bsr.MustClose() + } +} diff --git a/lib/logstorage/block_stream_writer.go b/lib/logstorage/block_stream_writer.go new file mode 100644 index 000000000..c16740b81 --- /dev/null +++ b/lib/logstorage/block_stream_writer.go @@ -0,0 +1,362 @@ +package logstorage + +import ( + "path/filepath" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// writerWithStats writes data to w and tracks the total amounts of data written at bytesWritten. +type writerWithStats struct { + w filestream.WriteCloser + bytesWritten uint64 +} + +func (w *writerWithStats) reset() { + w.w = nil + w.bytesWritten = 0 +} + +func (w *writerWithStats) init(wc filestream.WriteCloser) { + w.reset() + + w.w = wc +} + +func (w *writerWithStats) Path() string { + return w.w.Path() +} + +func (w *writerWithStats) MustWrite(data []byte) { + fs.MustWriteData(w.w, data) + w.bytesWritten += uint64(len(data)) +} + +// MustClose closes the underlying w. +func (w *writerWithStats) MustClose() { + w.w.MustClose() +} + +// streamWriters contain writers for blockStreamWriter +type streamWriters struct { + metaindexWriter writerWithStats + indexWriter writerWithStats + columnsHeaderWriter writerWithStats + timestampsWriter writerWithStats + fieldValuesWriter writerWithStats + fieldBloomFilterWriter writerWithStats + messageValuesWriter writerWithStats + messageBloomFilterWriter writerWithStats +} + +func (sw *streamWriters) reset() { + sw.metaindexWriter.reset() + sw.indexWriter.reset() + sw.columnsHeaderWriter.reset() + sw.timestampsWriter.reset() + sw.fieldValuesWriter.reset() + sw.fieldBloomFilterWriter.reset() + sw.messageValuesWriter.reset() + sw.messageBloomFilterWriter.reset() +} + +func (sw *streamWriters) init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter, fieldValuesWriter, fieldBloomFilterWriter, + messageValuesWriter, messageBloomFilterWriter filestream.WriteCloser, +) { + sw.metaindexWriter.init(metaindexWriter) + sw.indexWriter.init(indexWriter) + sw.columnsHeaderWriter.init(columnsHeaderWriter) + sw.timestampsWriter.init(timestampsWriter) + sw.fieldValuesWriter.init(fieldValuesWriter) + sw.fieldBloomFilterWriter.init(fieldBloomFilterWriter) + sw.messageValuesWriter.init(messageValuesWriter) + sw.messageBloomFilterWriter.init(messageBloomFilterWriter) +} + +func (sw *streamWriters) totalBytesWritten() uint64 { + n := uint64(0) + n += sw.metaindexWriter.bytesWritten + n += sw.indexWriter.bytesWritten + n += sw.columnsHeaderWriter.bytesWritten + n += sw.timestampsWriter.bytesWritten + n += sw.fieldValuesWriter.bytesWritten + n += sw.fieldBloomFilterWriter.bytesWritten + n += sw.messageValuesWriter.bytesWritten + n += sw.messageBloomFilterWriter.bytesWritten + return n +} + +func (sw *streamWriters) MustClose() { + sw.metaindexWriter.MustClose() + sw.indexWriter.MustClose() + sw.columnsHeaderWriter.MustClose() + sw.timestampsWriter.MustClose() + sw.fieldValuesWriter.MustClose() + sw.fieldBloomFilterWriter.MustClose() + sw.messageValuesWriter.MustClose() + sw.messageBloomFilterWriter.MustClose() +} + +// blockStreamWriter is used for writing blocks into the underlying storage in streaming manner. +type blockStreamWriter struct { + // streamWriters contains writer for block data + streamWriters streamWriters + + // sidLast is the streamID for the last written block + sidLast streamID + + // sidFirst is the streamID for the first block in the current indexBlock + sidFirst streamID + + // minTimestampLast is the minimum timestamp seen for the last written block + minTimestampLast int64 + + // minTimestamp is the minimum timestamp seen across written blocks for the current indexBlock + minTimestamp int64 + + // maxTimestamp is the maximum timestamp seen across written blocks for the current indexBlock + maxTimestamp int64 + + // hasWrittenBlocks is set to true if at least a single block is written to the current indexBlock + hasWrittenBlocks bool + + // globalUncompressedSizeBytes is the total size of all the log entries written via bsw + globalUncompressedSizeBytes uint64 + + // globalRowsCount is the total number of log entries written via bsw + globalRowsCount uint64 + + // globalBlocksCount is the total number of blocks written to bsw + globalBlocksCount uint64 + + // globalMinTimestamp is the minimum timestamp seen across all the blocks written to bsw + globalMinTimestamp int64 + + // globalMaxTimestamp is the maximum timestamp seen across all the blocks written to bsw + globalMaxTimestamp int64 + + // indexBlockData contains marshaled blockHeader data, which isn't written yet to indexFilename + indexBlockData []byte + + // metaindexData contains marshaled indexBlockHeader data, which isn't written yet to metaindexFilename + metaindexData []byte + + // indexBlockHeader is used for marshaling the data to metaindexData + indexBlockHeader indexBlockHeader +} + +// reset resets bsw for subsequent re-use. +func (bsw *blockStreamWriter) reset() { + bsw.streamWriters.reset() + bsw.sidLast.reset() + bsw.sidFirst.reset() + bsw.minTimestampLast = 0 + bsw.minTimestamp = 0 + bsw.maxTimestamp = 0 + bsw.hasWrittenBlocks = false + bsw.globalUncompressedSizeBytes = 0 + bsw.globalRowsCount = 0 + bsw.globalBlocksCount = 0 + bsw.globalMinTimestamp = 0 + bsw.globalMaxTimestamp = 0 + bsw.indexBlockData = bsw.indexBlockData[:0] + + if len(bsw.metaindexData) > 1024*1024 { + // The length of bsw.metaindexData is unbound, so drop too long buffer + // in order to conserve memory. + bsw.metaindexData = nil + } else { + bsw.metaindexData = bsw.metaindexData[:0] + } + + bsw.indexBlockHeader.reset() +} + +// MustInitFromInmemoryPart initializes bsw from mp +func (bsw *blockStreamWriter) MustInitForInmemoryPart(mp *inmemoryPart) { + bsw.reset() + bsw.streamWriters.init(&mp.metaindex, &mp.index, &mp.columnsHeader, &mp.timestamps, &mp.fieldValues, &mp.fieldBloomFilter, &mp.messageValues, &mp.messageBloomFilter) +} + +// MustInitForFilePart initializes bsw for writing data to file part located at path. +// +// if nocache is true, then the written data doesn't go to OS page cache. +func (bsw *blockStreamWriter) MustInitForFilePart(path string, nocache bool) { + bsw.reset() + + fs.MustMkdirFailIfExist(path) + + metaindexPath := filepath.Join(path, metaindexFilename) + indexPath := filepath.Join(path, indexFilename) + columnsHeaderPath := filepath.Join(path, columnsHeaderFilename) + timestampsPath := filepath.Join(path, timestampsFilename) + fieldValuesPath := filepath.Join(path, fieldValuesFilename) + fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename) + messageValuesPath := filepath.Join(path, messageValuesFilename) + messageBloomFilterPath := filepath.Join(path, messageBloomFilename) + + // Always cache metaindex file, since it it re-read immediately after part creation + metaindexWriter := filestream.MustCreate(metaindexPath, false) + + indexWriter := filestream.MustCreate(indexPath, nocache) + columnsHeaderWriter := filestream.MustCreate(columnsHeaderPath, nocache) + timestampsWriter := filestream.MustCreate(timestampsPath, nocache) + fieldValuesWriter := filestream.MustCreate(fieldValuesPath, nocache) + fieldBloomFilterWriter := filestream.MustCreate(fieldBloomFilterPath, nocache) + messageValuesWriter := filestream.MustCreate(messageValuesPath, nocache) + messageBloomFilterWriter := filestream.MustCreate(messageBloomFilterPath, nocache) + + bsw.streamWriters.init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter, + fieldValuesWriter, fieldBloomFilterWriter, messageValuesWriter, messageBloomFilterWriter) +} + +// MustWriteRows writes timestamps with rows under the given sid to bsw. +// +// timestamps must be sorted. +// sid must be bigger or equal to the sid for the previously written rs. +func (bsw *blockStreamWriter) MustWriteRows(sid *streamID, timestamps []int64, rows [][]Field) { + if len(timestamps) == 0 { + return + } + + b := getBlock() + b.MustInitFromRows(timestamps, rows) + bsw.MustWriteBlock(sid, b) + putBlock(b) +} + +// MustWriteBlockData writes bd to bsw. +// +// The bd.streamID must be bigger or equal to the streamID for the previously written blocks. +func (bsw *blockStreamWriter) MustWriteBlockData(bd *blockData) { + if bd.rowsCount == 0 { + return + } + bsw.mustWriteBlockInternal(&bd.streamID, nil, bd) +} + +// MustWriteBlock writes b under the given sid to bsw. +// +// The sid must be bigger or equal to the sid for the previously written blocks. +// The minimum timestamp in b must be bigger or equal to the minimum timestamp written to the same sid. +func (bsw *blockStreamWriter) MustWriteBlock(sid *streamID, b *block) { + rowsCount := b.Len() + if rowsCount == 0 { + return + } + bsw.mustWriteBlockInternal(sid, b, nil) +} + +func (bsw *blockStreamWriter) mustWriteBlockInternal(sid *streamID, b *block, bd *blockData) { + if sid.less(&bsw.sidLast) { + logger.Panicf("BUG: the sid=%s cannot be smaller than the previously written sid=%s", sid, &bsw.sidLast) + } + hasWrittenBlocks := bsw.hasWrittenBlocks + if !hasWrittenBlocks { + bsw.sidFirst = *sid + bsw.hasWrittenBlocks = true + } + isSeenSid := sid.equal(&bsw.sidLast) + bsw.sidLast = *sid + + bh := getBlockHeader() + if b != nil { + b.mustWriteTo(sid, bh, &bsw.streamWriters) + } else { + bd.mustWriteTo(bh, &bsw.streamWriters) + } + th := &bh.timestampsHeader + if bsw.globalRowsCount == 0 || th.minTimestamp < bsw.globalMinTimestamp { + bsw.globalMinTimestamp = th.minTimestamp + } + if bsw.globalRowsCount == 0 || th.maxTimestamp > bsw.globalMaxTimestamp { + bsw.globalMaxTimestamp = th.maxTimestamp + } + if !hasWrittenBlocks || th.minTimestamp < bsw.minTimestamp { + bsw.minTimestamp = th.minTimestamp + } + if !hasWrittenBlocks || th.maxTimestamp > bsw.maxTimestamp { + bsw.maxTimestamp = th.maxTimestamp + } + if isSeenSid && th.minTimestamp < bsw.minTimestampLast { + logger.Panicf("BUG: the block for sid=%s cannot contain timestamp smaller than %d, but it contains timestamp %d", sid, bsw.minTimestampLast, th.minTimestamp) + } + bsw.minTimestampLast = th.minTimestamp + + bsw.globalUncompressedSizeBytes += bh.uncompressedSizeBytes + bsw.globalRowsCount += bh.rowsCount + bsw.globalBlocksCount++ + + // Marshal bh + bsw.indexBlockData = bh.marshal(bsw.indexBlockData) + putBlockHeader(bh) + if len(bsw.indexBlockData) > maxUncompressedIndexBlockSize { + bsw.mustFlushIndexBlock(bsw.indexBlockData) + bsw.indexBlockData = bsw.indexBlockData[:0] + } +} + +func (bsw *blockStreamWriter) mustFlushIndexBlock(data []byte) { + if len(data) > 0 { + bsw.indexBlockHeader.mustWriteIndexBlock(data, bsw.sidFirst, bsw.minTimestamp, bsw.maxTimestamp, &bsw.streamWriters) + bsw.metaindexData = bsw.indexBlockHeader.marshal(bsw.metaindexData) + } + bsw.hasWrittenBlocks = false + bsw.minTimestamp = 0 + bsw.maxTimestamp = 0 + bsw.sidFirst.reset() +} + +// Finalize() finalizes the data write process and updates ph with the finalized stats +// +// It closes the writers passed to MustInit(). +// +// bsw can be re-used after calling Finalize(). +func (bsw *blockStreamWriter) Finalize(ph *partHeader) { + ph.UncompressedSizeBytes = bsw.globalUncompressedSizeBytes + ph.RowsCount = bsw.globalRowsCount + ph.BlocksCount = bsw.globalBlocksCount + ph.MinTimestamp = bsw.globalMinTimestamp + ph.MaxTimestamp = bsw.globalMaxTimestamp + + bsw.mustFlushIndexBlock(bsw.indexBlockData) + + // Write metaindex data + bb := longTermBufPool.Get() + bb.B = encoding.CompressZSTDLevel(bb.B[:0], bsw.metaindexData, 1) + bsw.streamWriters.metaindexWriter.MustWrite(bb.B) + if len(bb.B) < 1024*1024 { + longTermBufPool.Put(bb) + } + + ph.CompressedSizeBytes = bsw.streamWriters.totalBytesWritten() + + bsw.streamWriters.MustClose() + bsw.reset() +} + +var longTermBufPool bytesutil.ByteBufferPool + +// getBlockStreamWriter returns new blockStreamWriter from the pool. +// +// Return back the blockStreamWriter to the pool when it is no longer needed by calling putBlockStreamWriter. +func getBlockStreamWriter() *blockStreamWriter { + v := blockStreamWriterPool.Get() + if v == nil { + return &blockStreamWriter{} + } + return v.(*blockStreamWriter) +} + +// putBlockStreamWriter returns bsw to the pool. +func putBlockStreamWriter(bsw *blockStreamWriter) { + bsw.reset() + blockStreamWriterPool.Put(bsw) +} + +var blockStreamWriterPool sync.Pool diff --git a/lib/logstorage/block_test.go b/lib/logstorage/block_test.go new file mode 100644 index 000000000..b68b7ea41 --- /dev/null +++ b/lib/logstorage/block_test.go @@ -0,0 +1,179 @@ +package logstorage + +import ( + "fmt" + "reflect" + "testing" +) + +func TestBlockMustInitFromRows(t *testing.T) { + f := func(timestamps []int64, rows [][]Field, bExpected *block) { + t.Helper() + b := getBlock() + defer putBlock(b) + + b.MustInitFromRows(timestamps, rows) + if b.uncompressedSizeBytes() >= maxUncompressedBlockSize { + t.Fatalf("expecting non-full block") + } + if !reflect.DeepEqual(b, bExpected) { + t.Fatalf("unexpected block;\ngot\n%v\nwant\n%v", b, bExpected) + } + if n := b.Len(); n != len(timestamps) { + t.Fatalf("unexpected block len; got %d; want %d", n, len(timestamps)) + } + b.assertValid() + } + + // An empty log entries + f(nil, nil, &block{}) + f([]int64{}, [][]Field{}, &block{}) + + // A single row + timestamps := []int64{1234} + rows := [][]Field{ + { + { + Name: "msg", + Value: "foo", + }, + { + Name: "level", + Value: "error", + }, + }, + } + bExpected := &block{ + timestamps: []int64{1234}, + constColumns: []Field{ + { + Name: "level", + Value: "error", + }, + { + Name: "msg", + Value: "foo", + }, + }, + } + f(timestamps, rows, bExpected) + + // Multiple log entries with the same set of fields + timestamps = []int64{3, 5} + rows = [][]Field{ + { + { + Name: "job", + Value: "foo", + }, + { + Name: "instance", + Value: "host1", + }, + }, + { + { + Name: "job", + Value: "foo", + }, + { + Name: "instance", + Value: "host2", + }, + }, + } + bExpected = &block{ + timestamps: []int64{3, 5}, + columns: []column{ + { + name: "instance", + values: []string{"host1", "host2"}, + }, + }, + constColumns: []Field{ + { + Name: "job", + Value: "foo", + }, + }, + } + f(timestamps, rows, bExpected) + + // Multiple log entries with distinct set of fields + timestamps = []int64{3, 5, 10} + rows = [][]Field{ + { + { + Name: "msg", + Value: "foo", + }, + { + Name: "b", + Value: "xyz", + }, + }, + { + { + Name: "b", + Value: "xyz", + }, + { + Name: "a", + Value: "aaa", + }, + }, + { + { + Name: "b", + Value: "xyz", + }, + }, + } + bExpected = &block{ + timestamps: []int64{3, 5, 10}, + columns: []column{ + { + name: "a", + values: []string{"", "aaa", ""}, + }, + { + name: "msg", + values: []string{"foo", "", ""}, + }, + }, + constColumns: []Field{ + { + Name: "b", + Value: "xyz", + }, + }, + } + f(timestamps, rows, bExpected) +} + +func TestBlockMustInitFromRowsFullBlock(t *testing.T) { + const rowsCount = 2000 + timestamps := make([]int64, rowsCount) + rows := make([][]Field, rowsCount) + for i := range timestamps { + fields := make([]Field, 10) + for j := range fields { + fields[j] = Field{ + Name: fmt.Sprintf("field_%d", j), + Value: "very very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong value", + } + } + rows[i] = fields + } + + b := getBlock() + defer putBlock(b) + b.MustInitFromRows(timestamps, rows) + if n := b.Len(); n != len(rows) { + t.Fatalf("unexpected total log entries; got %d; want %d", n, len(rows)) + } + if b.uncompressedSizeBytes() < maxUncompressedBlockSize { + t.Fatalf("expecting full block") + } + b.assertValid() +} diff --git a/lib/logstorage/block_timing_test.go b/lib/logstorage/block_timing_test.go new file mode 100644 index 000000000..9d2a5e15a --- /dev/null +++ b/lib/logstorage/block_timing_test.go @@ -0,0 +1,46 @@ +package logstorage + +import ( + "fmt" + "testing" +) + +func BenchmarkBlock_MustInitFromRows(b *testing.B) { + for _, rowsPerBlock := range []int{1, 10, 100, 1000, 10000} { + b.Run(fmt.Sprintf("rowsPerBlock_%d", rowsPerBlock), func(b *testing.B) { + benchmarkBlockMustInitFromRows(b, rowsPerBlock) + }) + } +} + +func benchmarkBlockMustInitFromRows(b *testing.B, rowsPerBlock int) { + timestamps, rows := newTestRows(rowsPerBlock, 10) + b.ReportAllocs() + b.SetBytes(int64(len(timestamps))) + b.RunParallel(func(pb *testing.PB) { + block := getBlock() + defer putBlock(block) + for pb.Next() { + block.MustInitFromRows(timestamps, rows) + if n := block.Len(); n != len(timestamps) { + panic(fmt.Errorf("unexpected block length; got %d; want %d", n, len(timestamps))) + } + } + }) +} + +func newTestRows(rowsCount, fieldsPerRow int) ([]int64, [][]Field) { + timestamps := make([]int64, rowsCount) + rows := make([][]Field, rowsCount) + for i := range timestamps { + timestamps[i] = int64(i) * 1e9 + fields := make([]Field, fieldsPerRow) + for j := range fields { + f := &fields[j] + f.Name = fmt.Sprintf("field_%d", j) + f.Value = fmt.Sprintf("value_%d_%d", i, j) + } + rows[i] = fields + } + return timestamps, rows +} diff --git a/lib/logstorage/bloomfilter.go b/lib/logstorage/bloomfilter.go new file mode 100644 index 000000000..723949cf3 --- /dev/null +++ b/lib/logstorage/bloomfilter.go @@ -0,0 +1,176 @@ +package logstorage + +import ( + "fmt" + "sync" + "unsafe" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/cespare/xxhash/v2" +) + +// bloomFilterHashesCount is the number of different hashes to use for bloom filter. +const bloomFilterHashesCount = 6 + +// bloomFilterBitsPerItem is the number of bits to use per each token. +const bloomFilterBitsPerItem = 16 + +// bloomFilterMarshal appends marshaled bloom filter for tokens to dst and returns the result. +func bloomFilterMarshal(dst []byte, tokens []string) []byte { + bf := getBloomFilter() + bf.mustInit(tokens) + dst = bf.marshal(dst) + putBloomFilter(bf) + return dst +} + +type bloomFilter struct { + bits []uint64 +} + +func (bf *bloomFilter) reset() { + bits := bf.bits + for i := range bits { + bits[i] = 0 + } + bf.bits = bits[:0] +} + +// marshal appends marshaled bf to dst and returns the result. +func (bf *bloomFilter) marshal(dst []byte) []byte { + bits := bf.bits + for _, word := range bits { + dst = encoding.MarshalUint64(dst, word) + } + return dst +} + +// unmarshal unmarshals bf from src. +func (bf *bloomFilter) unmarshal(src []byte) error { + if len(src)%8 != 0 { + return fmt.Errorf("cannot unmarshal bloomFilter from src with size not multiple by 8; len(src)=%d", len(src)) + } + bf.reset() + wordsCount := len(src) / 8 + bits := bf.bits + if n := wordsCount - cap(bits); n > 0 { + bits = append(bits[:cap(bits)], make([]uint64, n)...) + } + bits = bits[:wordsCount] + for i := range bits { + bits[i] = encoding.UnmarshalUint64(src) + src = src[8:] + } + bf.bits = bits + return nil +} + +// mustInit initializes bf with the given tokens +func (bf *bloomFilter) mustInit(tokens []string) { + bitsCount := len(tokens) * bloomFilterBitsPerItem + wordsCount := (bitsCount + 63) / 64 + bits := bf.bits + if n := wordsCount - cap(bits); n > 0 { + bits = append(bits[:cap(bits)], make([]uint64, n)...) + } + bits = bits[:wordsCount] + bloomFilterAdd(bits, tokens) + bf.bits = bits +} + +// bloomFilterAdd adds the given tokens to the bloom filter bits +func bloomFilterAdd(bits []uint64, tokens []string) { + maxBits := uint64(len(bits)) * 64 + var buf [8]byte + hp := (*uint64)(unsafe.Pointer(&buf[0])) + for _, token := range tokens { + *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) + for i := 0; i < bloomFilterHashesCount; i++ { + hi := xxhash.Sum64(buf[:]) + (*hp)++ + idx := hi % maxBits + i := idx / 64 + j := idx % 64 + mask := uint64(1) << j + w := bits[i] + if (w & mask) == 0 { + bits[i] = w | mask + } + } + } +} + +// containsAll returns true if bf contains all the given tokens. +func (bf *bloomFilter) containsAll(tokens []string) bool { + bits := bf.bits + if len(bits) == 0 { + return true + } + maxBits := uint64(len(bits)) * 64 + var buf [8]byte + hp := (*uint64)(unsafe.Pointer(&buf[0])) + for _, token := range tokens { + *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) + for i := 0; i < bloomFilterHashesCount; i++ { + hi := xxhash.Sum64(buf[:]) + (*hp)++ + idx := hi % maxBits + i := idx / 64 + j := idx % 64 + mask := uint64(1) << j + w := bits[i] + if (w & mask) == 0 { + // The token is missing + return false + } + } + } + return true +} + +// containsAny returns true if bf contains at least a single token from the given tokens. +func (bf *bloomFilter) containsAny(tokens []string) bool { + bits := bf.bits + if len(bits) == 0 { + return true + } + maxBits := uint64(len(bits)) * 64 + var buf [8]byte + hp := (*uint64)(unsafe.Pointer(&buf[0])) +nextToken: + for _, token := range tokens { + *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) + for i := 0; i < bloomFilterHashesCount; i++ { + hi := xxhash.Sum64(buf[:]) + (*hp)++ + idx := hi % maxBits + i := idx / 64 + j := idx % 64 + mask := uint64(1) << j + w := bits[i] + if (w & mask) == 0 { + // The token is missing. Check the next token + continue nextToken + } + } + // It is likely the token exists in the bloom filter + return true + } + return false +} + +func getBloomFilter() *bloomFilter { + v := bloomFilterPool.Get() + if v == nil { + return &bloomFilter{} + } + return v.(*bloomFilter) +} + +func putBloomFilter(bf *bloomFilter) { + bf.reset() + bloomFilterPool.Put(bf) +} + +var bloomFilterPool sync.Pool diff --git a/lib/logstorage/bloomfilter_test.go b/lib/logstorage/bloomfilter_test.go new file mode 100644 index 000000000..061e1483f --- /dev/null +++ b/lib/logstorage/bloomfilter_test.go @@ -0,0 +1,84 @@ +package logstorage + +import ( + "fmt" + "testing" +) + +func TestBloomFilter(t *testing.T) { + f := func(tokens []string) { + t.Helper() + data := bloomFilterMarshal(nil, tokens) + bf := getBloomFilter() + defer putBloomFilter(bf) + if err := bf.unmarshal(data); err != nil { + t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err) + } + for _, token := range tokens { + if !bf.containsAny([]string{token}) { + t.Fatalf("bloomFilterContains must return true for the added token %q", token) + } + } + if !bf.containsAll(tokens) { + t.Fatalf("bloomFilterContains must return true for the added tokens") + } + } + f(nil) + f([]string{"foo"}) + f([]string{"foo", "bar", "baz"}) + + // 10k tokens + tokens := make([]string, 10000) + for i := range tokens { + tokens[i] = fmt.Sprintf("token_%d", i) + } + f(tokens) +} + +func TestBloomFilterUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + bf := getBloomFilter() + defer putBloomFilter(bf) + if err := bf.unmarshal(data); err == nil { + t.Fatalf("expecting non-nil error") + } + } + f([]byte("a")) + f([]byte("foo")) +} + +func TestBloomFilterUnmarshalGarbage(t *testing.T) { + data := []byte("01234567") + var bf bloomFilter + if err := bf.unmarshal(data); err != nil { + t.Fatalf("unexpected error: %s", err) + } +} + +func TestBloomFilterFalsePositive(t *testing.T) { + tokens := make([]string, 20000) + for i := range tokens { + tokens[i] = fmt.Sprintf("token_%d", i) + } + data := bloomFilterMarshal(nil, tokens) + bf := getBloomFilter() + defer putBloomFilter(bf) + if err := bf.unmarshal(data); err != nil { + t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err) + } + + // count the number of false positives on 20K non-existing tokens + falsePositives := 0 + for i := range tokens { + token := fmt.Sprintf("non-existing-token_%d", i) + if bf.containsAny([]string{token}) { + falsePositives++ + } + } + p := float64(falsePositives) / float64(len(tokens)) + maxFalsePositive := 0.0011 + if p > maxFalsePositive { + t.Fatalf("too high false positive rate; got %.4f; want %.4f max", p, maxFalsePositive) + } +} diff --git a/lib/logstorage/consts.go b/lib/logstorage/consts.go new file mode 100644 index 000000000..20e3590e2 --- /dev/null +++ b/lib/logstorage/consts.go @@ -0,0 +1,32 @@ +package logstorage + +// maxUncompressedIndexBlockSize contains the maximum length of uncompressed block with blockHeader entries aka index block. +// +// The real block length can exceed this value by a small percentage because of the block write details. +const maxUncompressedIndexBlockSize = 128 * 1024 + +// maxUncompressedBlockSize is the maximum size of uncompressed block in bytes. +// +// The real uncompressed block can exceed this value by up to 2 times because of block merge details. +const maxUncompressedBlockSize = 2 * 1024 * 1024 + +// maxRowsPerBlock is the maximum number of log entries a single block can contain. +const maxRowsPerBlock = 8 * 1024 * 1024 + +// maxColumnsPerBlock is the maximum number of columns per block. +const maxColumnsPerBlock = 10000 + +// maxIndexBlockSize is the maximum size of the block with blockHeader entries (aka indexBlock) +const maxIndexBlockSize = 8 * 1024 * 1024 + +// maxTimestampsBlockSize is the maximum size of timestamps block +const maxTimestampsBlockSize = 8 * 1024 * 1024 + +// maxValuesBlockSize is the maximum size of values block +const maxValuesBlockSize = 8 * 1024 * 1024 + +// maxBloomFilterBlockSize is the maximum size of bloom filter block +const maxBloomFilterBlockSize = 8 * 1024 * 1024 + +// maxColumnsHeaderSize is the maximum size of columnsHeader block +const maxColumnsHeaderSize = 8 * 1024 * 1024 diff --git a/lib/logstorage/datadb.go b/lib/logstorage/datadb.go new file mode 100644 index 000000000..e605e2f24 --- /dev/null +++ b/lib/logstorage/datadb.go @@ -0,0 +1,990 @@ +package logstorage + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "sync" + "sync/atomic" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" +) + +// Default number of parts to merge at once. +// +// This number has been obtained empirically - it gives the lowest possible overhead. +// See appendPartsToMerge tests for details. +const defaultPartsToMerge = 15 + +// minMergeMultiplier is the minimum multiplier for the size of the output part +// compared to the size of the maximum input part for the merge. +// +// Higher value reduces write amplification (disk write IO induced by the merge), +// while increases the number of unmerged parts. +// The 1.7 is good enough for production workloads. +const minMergeMultiplier = 1.7 + +// The maximum number of inmemory parts in the partition. +// +// If the number of inmemory parts reaches this value, then assisted merge runs during data ingestion. +const maxInmemoryPartsPerPartition = 20 + +// datadb represents a database with log data +type datadb struct { + // pt is the partition the datadb belongs to + pt *partition + + // mergeIdx is used for generating unique directory names for parts + mergeIdx uint64 + + // path is the path to the directory with log data + path string + + // flushInterval is interval for flushing the inmemory parts to disk + flushInterval time.Duration + + // inmemoryParts contains a list of inmemory parts + inmemoryParts []*partWrapper + + // fileParts contains a list of file-based parts + fileParts []*partWrapper + + // partsLock protects parts from concurrent access + partsLock sync.Mutex + + // wg is used for determining when background workers stop + wg sync.WaitGroup + + // stopCh is used for notifying background workers to stop + stopCh chan struct{} + + // mergeDoneCond is used for pace-limiting the data ingestion rate + mergeDoneCond *sync.Cond + + // inmemoryPartsFlushersCount is the number of currently running in-memory parts flushers + // + // This variable must be accessed under partsLock. + inmemoryPartsFlushersCount int + + // mergeWorkersCount is the number of currently running merge workers + // + // This variable must be accessed under partsLock. + mergeWorkersCount int +} + +// partWrapper is a wrapper for opened part. +type partWrapper struct { + // refCount is the number of references to p. + // + // When the number of references reaches zero, then p is closed. + refCount int32 + + // The flag, which is set when the part must be deleted after refCount reaches zero. + mustBeDeleted uint32 + + // p is an opened part + p *part + + // mp references inmemory part used for initializing p. + mp *inmemoryPart + + // isInMerge is set to true if the part takes part in merge. + isInMerge bool + + // The deadline when in-memory part must be flushed to disk. + flushDeadline time.Time +} + +func (pw *partWrapper) incRef() { + atomic.AddInt32(&pw.refCount, 1) +} + +func (pw *partWrapper) decRef() { + n := atomic.AddInt32(&pw.refCount, -1) + if n > 0 { + return + } + + deletePath := "" + if pw.mp == nil { + if atomic.LoadUint32(&pw.mustBeDeleted) != 0 { + deletePath = pw.p.path + } + } else { + putInmemoryPart(pw.mp) + pw.mp = nil + } + + mustClosePart(pw.p) + pw.p = nil + + if deletePath != "" { + fs.MustRemoveAll(deletePath) + } +} + +func mustCreateDatadb(path string) { + fs.MustMkdirFailIfExist(path) + mustWritePartNames(path, []string{}) +} + +// mustOpenDatadb opens datadb at the given path with the given flushInterval for in-memory data. +func mustOpenDatadb(pt *partition, path string, flushInterval time.Duration) *datadb { + // Remove temporary directories, which may be left after unclean shutdown. + fs.MustRemoveTemporaryDirs(path) + + partNames := mustReadPartNames(path) + mustRemoveUnusedDirs(path, partNames) + + pws := make([]*partWrapper, len(partNames)) + for i, partName := range partNames { + partPath := filepath.Join(path, partName) + p := mustOpenFilePart(pt, partPath) + pws[i] = newPartWrapper(p, nil, time.Time{}) + } + + ddb := &datadb{ + pt: pt, + mergeIdx: uint64(time.Now().UnixNano()), + flushInterval: flushInterval, + path: path, + fileParts: pws, + stopCh: make(chan struct{}), + } + ddb.mergeDoneCond = sync.NewCond(&ddb.partsLock) + + // Start merge workers in the hope they'll merge the remaining parts + ddb.partsLock.Lock() + n := getMergeWorkersCount() + for i := 0; i < n; i++ { + ddb.startMergeWorkerLocked() + } + ddb.partsLock.Unlock() + + return ddb +} + +// startInmemoryPartsFlusherLocked starts flusher for in-memory parts to disk. +// +// This function must be called under partsLock. +func (ddb *datadb) startInmemoryPartsFlusherLocked() { + if ddb.inmemoryPartsFlushersCount >= 1 { + return + } + ddb.inmemoryPartsFlushersCount++ + ddb.wg.Add(1) + go func() { + ddb.flushInmemoryParts() + ddb.wg.Done() + }() +} + +func (ddb *datadb) flushInmemoryParts() { + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for { + ddb.partsLock.Lock() + pws := make([]*partWrapper, 0, len(ddb.inmemoryParts)) + pws = appendNotInMergePartsLocked(pws, ddb.inmemoryParts) + currentTime := time.Now() + partsToFlush := pws[:0] + for _, pw := range pws { + if pw.flushDeadline.Before(currentTime) { + partsToFlush = append(partsToFlush, pw) + } + } + setInMergeLocked(partsToFlush) + if len(pws) == 0 { + ddb.inmemoryPartsFlushersCount-- + } + ddb.partsLock.Unlock() + + if len(pws) == 0 { + // There are no in-memory parts, so stop the flusher. + return + } + ddb.mustMergePartsFinal(partsToFlush) + + select { + case <-ddb.stopCh: + return + case <-ticker.C: + } + } +} + +// startMergeWorkerLocked starts a merge worker. +// +// This function must be called under locked partsLock. +func (ddb *datadb) startMergeWorkerLocked() { + if ddb.mergeWorkersCount >= getMergeWorkersCount() { + return + } + ddb.mergeWorkersCount++ + ddb.wg.Add(1) + go func() { + globalMergeLimitCh <- struct{}{} + ddb.mustMergeExistingParts() + <-globalMergeLimitCh + ddb.wg.Done() + }() +} + +// globalMergeLimitCh limits the number of concurrent merges across all the partitions +var globalMergeLimitCh = make(chan struct{}, getMergeWorkersCount()) + +func getMergeWorkersCount() int { + n := cgroup.AvailableCPUs() + if n < 4 { + // Use bigger number of workers on systems with small number of CPU cores, + // since a single worker may become busy for long time when merging big parts. + // Then the remaining workers may continue performing merges + // for newly added small parts. + return 4 + } + return n +} + +func (ddb *datadb) mustMergeExistingParts() { + for !needStop(ddb.stopCh) { + maxOutBytes := ddb.availableDiskSpace() + + ddb.partsLock.Lock() + parts := make([]*partWrapper, 0, len(ddb.inmemoryParts)+len(ddb.fileParts)) + parts = appendNotInMergePartsLocked(parts, ddb.inmemoryParts) + parts = appendNotInMergePartsLocked(parts, ddb.fileParts) + pws := appendPartsToMerge(nil, parts, maxOutBytes) + setInMergeLocked(pws) + if len(pws) == 0 { + ddb.mergeWorkersCount-- + } + ddb.partsLock.Unlock() + + if len(pws) == 0 { + // Nothing to merge at the moment. + return + } + + partsSize := getCompressedSize(pws) + if !ddb.reserveDiskSpace(partsSize) { + // There is no free disk space for the merge, + // because concurrent merge workers already reserved the disk space. + // Try again with smaller maxOutBytes. + ddb.releasePartsToMerge(pws) + continue + } + ddb.mustMergeParts(pws, false) + ddb.releaseDiskSpace(partsSize) + } +} + +// appendNotInMergePartsLocked appends src parts with isInMerge=false to dst and returns the result. +// +// This function must be called under partsLock. +func appendNotInMergePartsLocked(dst, src []*partWrapper) []*partWrapper { + for _, pw := range src { + if !pw.isInMerge { + dst = append(dst, pw) + } + } + return dst +} + +// setInMergeLocked sets isInMerge flag for pws. +// +// This function must be called under partsLock. +func setInMergeLocked(pws []*partWrapper) { + for _, pw := range pws { + if pw.isInMerge { + logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to true") + } + pw.isInMerge = true + } +} + +func assertIsInMerge(pws []*partWrapper) { + for _, pw := range pws { + if !pw.isInMerge { + logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to false") + } + } +} + +// mustMergeParts merges pws to a single resulting part. +// +// if isFinal is set, then the resulting part will be saved to disk. +// +// All the parts inside pws must have isInMerge field set to true. +func (ddb *datadb) mustMergeParts(pws []*partWrapper, isFinal bool) { + if len(pws) == 0 { + // Nothing to merge. + return + } + assertIsInMerge(pws) + + startTime := time.Now() + + // Initialize destination paths. + dstPartType := ddb.getDstPartType(pws, isFinal) + mergeIdx := ddb.nextMergeIdx() + dstPartPath := ddb.getDstPartPath(dstPartType, mergeIdx) + + if isFinal && len(pws) == 1 && pws[0].mp != nil { + // Fast path: flush a single in-memory part to disk. + mp := pws[0].mp + mp.MustStoreToDisk(dstPartPath) + pwNew := ddb.openCreatedPart(&mp.ph, pws, nil, dstPartPath) + ddb.swapSrcWithDstParts(pws, pwNew, dstPartType) + return + } + + // Prepare blockStreamReaders for source parts. + bsrs := mustOpenBlockStreamReaders(pws) + + // Prepare BlockStreamWriter for destination part. + srcSize := uint64(0) + srcRowsCount := uint64(0) + srcBlocksCount := uint64(0) + for _, pw := range pws { + srcSize += pw.p.ph.CompressedSizeBytes + srcRowsCount += pw.p.ph.RowsCount + srcBlocksCount += pw.p.ph.BlocksCount + } + bsw := getBlockStreamWriter() + var mpNew *inmemoryPart + if dstPartType == partInmemory { + mpNew = getInmemoryPart() + bsw.MustInitForInmemoryPart(mpNew) + } else { + nocache := !shouldUsePageCacheForPartSize(srcSize) + bsw.MustInitForFilePart(dstPartPath, nocache) + } + + // Merge source parts to destination part. + var ph partHeader + stopCh := ddb.stopCh + if isFinal { + // The final merge shouldn't be stopped even if ddb.stopCh is closed. + stopCh = nil + } + mustMergeBlockStreams(&ph, bsw, bsrs, stopCh) + putBlockStreamWriter(bsw) + for _, bsr := range bsrs { + putBlockStreamReader(bsr) + } + + // Persist partHeader for destination part after the merge. + if mpNew != nil { + mpNew.ph = ph + } else { + ph.mustWriteMetadata(dstPartPath) + // Make sure the created part directory listing is synced. + fs.MustSyncPath(dstPartPath) + } + if needStop(stopCh) { + ddb.releasePartsToMerge(pws) + ddb.mergeDoneCond.Broadcast() + // Remove incomplete destination part + if dstPartType == partFile { + fs.MustRemoveAll(dstPartPath) + } + return + } + + // Atomically swap the source parts with the newly created part. + pwNew := ddb.openCreatedPart(&ph, pws, mpNew, dstPartPath) + + dstSize := uint64(0) + dstRowsCount := uint64(0) + dstBlocksCount := uint64(0) + if pwNew != nil { + pDst := pwNew.p + dstSize = pDst.ph.CompressedSizeBytes + dstRowsCount = pDst.ph.RowsCount + dstBlocksCount = pDst.ph.BlocksCount + } + + ddb.swapSrcWithDstParts(pws, pwNew, dstPartType) + + d := time.Since(startTime) + if d <= 30*time.Second { + return + } + + // Log stats for long merges. + durationSecs := d.Seconds() + rowsPerSec := int(float64(srcRowsCount) / durationSecs) + logger.Infof("merged (%d parts, %d rows, %d blocks, %d bytes) into (1 part, %d rows, %d blocks, %d bytes) in %.3f seconds at %d rows/sec to %q", + len(pws), srcRowsCount, srcBlocksCount, srcSize, dstRowsCount, dstBlocksCount, dstSize, durationSecs, rowsPerSec, dstPartPath) +} + +func (ddb *datadb) nextMergeIdx() uint64 { + return atomic.AddUint64(&ddb.mergeIdx, 1) +} + +type partType int + +var ( + partInmemory = partType(0) + partFile = partType(1) +) + +func (ddb *datadb) getDstPartType(pws []*partWrapper, isFinal bool) partType { + if isFinal { + return partFile + } + dstPartSize := getCompressedSize(pws) + if dstPartSize > getMaxInmemoryPartSize() { + return partFile + } + if !areAllInmemoryParts(pws) { + // If at least a single source part is located in file, + // then the destination part must be in file for durability reasons. + return partFile + } + return partInmemory +} + +func (ddb *datadb) getDstPartPath(dstPartType partType, mergeIdx uint64) string { + ptPath := ddb.path + dstPartPath := "" + if dstPartType != partInmemory { + dstPartPath = filepath.Join(ptPath, fmt.Sprintf("%016X", mergeIdx)) + } + return dstPartPath +} + +func (ddb *datadb) openCreatedPart(ph *partHeader, pws []*partWrapper, mpNew *inmemoryPart, dstPartPath string) *partWrapper { + // Open the created part. + if ph.RowsCount == 0 { + // The created part is empty. Remove it + if mpNew == nil { + fs.MustRemoveAll(dstPartPath) + } + return nil + } + var p *part + var flushDeadline time.Time + if mpNew != nil { + // Open the created part from memory. + p = mustOpenInmemoryPart(ddb.pt, mpNew) + flushDeadline = ddb.getFlushToDiskDeadline(pws) + } else { + // Open the created part from disk. + p = mustOpenFilePart(ddb.pt, dstPartPath) + } + return newPartWrapper(p, mpNew, flushDeadline) +} + +func (ddb *datadb) mustAddRows(lr *LogRows) { + if len(lr.streamIDs) == 0 { + return + } + + mp := getInmemoryPart() + mp.mustInitFromRows(lr) + p := mustOpenInmemoryPart(ddb.pt, mp) + + flushDeadline := time.Now().Add(ddb.flushInterval) + pw := newPartWrapper(p, mp, flushDeadline) + + ddb.partsLock.Lock() + ddb.inmemoryParts = append(ddb.inmemoryParts, pw) + ddb.startInmemoryPartsFlusherLocked() + if len(ddb.inmemoryParts) > defaultPartsToMerge { + ddb.startMergeWorkerLocked() + } + for len(ddb.inmemoryParts) > maxInmemoryPartsPerPartition { + // limit the pace for data ingestion if too many inmemory parts are created + ddb.mergeDoneCond.Wait() + } + ddb.partsLock.Unlock() +} + +// DatadbStats contains various stats for datadb. +type DatadbStats struct { + // InmemoryRowsCount is the number of rows, which weren't flushed to disk yet. + InmemoryRowsCount uint64 + + // FileRowsCount is the number of rows stored on disk. + FileRowsCount uint64 + + // InmemoryParts is the number of in-memory parts, which weren't flushed to disk yet. + InmemoryParts uint64 + + // FileParts is the number of file-based parts stored on disk. + FileParts uint64 + + // InmemoryBlocks is the number of in-memory blocks, which weren't flushed to disk yet. + InmemoryBlocks uint64 + + // FileBlocks is the number of file-based blocks stored on disk. + FileBlocks uint64 + + // CompressedInmemorySize is the size of compressed data stored in memory. + CompressedInmemorySize uint64 + + // CompressedFileSize is the size of compressed data stored on disk. + CompressedFileSize uint64 + + // UncompressedInmemorySize is the size of uncompressed data stored in memory. + UncompressedInmemorySize uint64 + + // UncompressedFileSize is the size of uncompressed data stored on disk. + UncompressedFileSize uint64 +} + +func (s *DatadbStats) reset() { + *s = DatadbStats{} +} + +// RowsCount returns the number of rows stored in datadb. +func (s *DatadbStats) RowsCount() uint64 { + return s.InmemoryRowsCount + s.FileRowsCount +} + +// updateStats updates s with ddb stats +func (ddb *datadb) updateStats(s *DatadbStats) { + ddb.partsLock.Lock() + + s.InmemoryRowsCount += getRowsCount(ddb.inmemoryParts) + s.FileRowsCount += getRowsCount(ddb.fileParts) + + s.InmemoryParts += uint64(len(ddb.inmemoryParts)) + s.FileParts += uint64(len(ddb.fileParts)) + + s.InmemoryBlocks += getBlocksCount(ddb.inmemoryParts) + s.FileBlocks += getBlocksCount(ddb.fileParts) + + s.CompressedInmemorySize += getCompressedSize(ddb.inmemoryParts) + s.CompressedFileSize += getCompressedSize(ddb.fileParts) + + s.UncompressedInmemorySize += getUncompressedSize(ddb.inmemoryParts) + s.UncompressedFileSize += getUncompressedSize(ddb.fileParts) + + ddb.partsLock.Unlock() +} + +// debugFlush() makes sure that the recently ingested data is availalbe for search. +func (ddb *datadb) debugFlush() { + // Nothing to do, since all the ingested data is available for search via ddb.inmemoryParts. +} + +func (ddb *datadb) mustMergePartsFinal(pws []*partWrapper) { + assertIsInMerge(pws) + + var pwsChunk []*partWrapper + for len(pws) > 0 { + pwsChunk = appendPartsToMerge(pwsChunk[:0], pws, (1<<64)-1) + if len(pwsChunk) == 0 { + pwsChunk = append(pwsChunk[:0], pws...) + } + ddb.mustMergeParts(pwsChunk, true) + + partsToRemove := partsToMap(pwsChunk) + removedParts := 0 + pws, removedParts = removeParts(pws, partsToRemove) + if removedParts != len(pwsChunk) { + logger.Panicf("BUG: unexpected number of parts removed; got %d; want %d", removedParts, len(pwsChunk)) + } + } +} + +func partsToMap(pws []*partWrapper) map[*partWrapper]struct{} { + m := make(map[*partWrapper]struct{}, len(pws)) + for _, pw := range pws { + m[pw] = struct{}{} + } + if len(m) != len(pws) { + logger.Panicf("BUG: %d duplicate parts found out of %d parts", len(pws)-len(m), len(pws)) + } + return m +} + +func (ddb *datadb) swapSrcWithDstParts(pws []*partWrapper, pwNew *partWrapper, dstPartType partType) { + // Atomically unregister old parts and add new part to pt. + partsToRemove := partsToMap(pws) + removedInmemoryParts := 0 + removedFileParts := 0 + + ddb.partsLock.Lock() + + ddb.inmemoryParts, removedInmemoryParts = removeParts(ddb.inmemoryParts, partsToRemove) + ddb.fileParts, removedFileParts = removeParts(ddb.fileParts, partsToRemove) + if pwNew != nil { + switch dstPartType { + case partInmemory: + ddb.inmemoryParts = append(ddb.inmemoryParts, pwNew) + ddb.startInmemoryPartsFlusherLocked() + case partFile: + ddb.fileParts = append(ddb.fileParts, pwNew) + default: + logger.Panicf("BUG: unknown partType=%d", dstPartType) + } + if len(ddb.inmemoryParts)+len(ddb.fileParts) > defaultPartsToMerge { + ddb.startMergeWorkerLocked() + } + } + + // Atomically store the updated list of file-based parts on disk. + // This must be performed under partsLock in order to prevent from races + // when multiple concurrently running goroutines update the list. + if removedFileParts > 0 || pwNew != nil && dstPartType == partFile { + partNames := getPartNames(ddb.fileParts) + mustWritePartNames(ddb.path, partNames) + } + + ddb.partsLock.Unlock() + + removedParts := removedInmemoryParts + removedFileParts + if removedParts != len(partsToRemove) { + logger.Panicf("BUG: unexpected number of parts removed; got %d, want %d", removedParts, len(partsToRemove)) + } + + // Mark old parts as must be deleted and decrement reference count, + // so they are eventually closed and deleted. + for _, pw := range pws { + atomic.StoreUint32(&pw.mustBeDeleted, 1) + pw.decRef() + } + + ddb.mergeDoneCond.Broadcast() +} + +func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]struct{}) ([]*partWrapper, int) { + dst := pws[:0] + for _, pw := range pws { + if _, ok := partsToRemove[pw]; !ok { + dst = append(dst, pw) + } + } + for i := len(dst); i < len(pws); i++ { + pws[i] = nil + } + return dst, len(pws) - len(dst) +} + +func mustOpenBlockStreamReaders(pws []*partWrapper) []*blockStreamReader { + bsrs := make([]*blockStreamReader, 0, len(pws)) + for _, pw := range pws { + bsr := getBlockStreamReader() + if pw.mp != nil { + bsr.MustInitFromInmemoryPart(pw.mp) + } else { + bsr.MustInitFromFilePart(pw.p.path) + } + bsrs = append(bsrs, bsr) + } + return bsrs +} + +func newPartWrapper(p *part, mp *inmemoryPart, flushDeadline time.Time) *partWrapper { + pw := &partWrapper{ + p: p, + mp: mp, + + flushDeadline: flushDeadline, + } + + // Increase reference counter for newly created part - it is decreased when the part + // is removed from the list of open parts. + pw.incRef() + + return pw +} + +func (ddb *datadb) getFlushToDiskDeadline(pws []*partWrapper) time.Time { + d := time.Now().Add(ddb.flushInterval) + for _, pw := range pws { + if pw.mp != nil && pw.flushDeadline.Before(d) { + d = pw.flushDeadline + } + } + return d +} + +func getMaxInmemoryPartSize() uint64 { + // Allocate 10% of allowed memory for in-memory parts. + n := uint64(0.1 * float64(memory.Allowed()) / maxInmemoryPartsPerPartition) + if n < 1e6 { + n = 1e6 + } + return n +} + +func areAllInmemoryParts(pws []*partWrapper) bool { + for _, pw := range pws { + if pw.mp == nil { + return false + } + } + return true +} + +func (ddb *datadb) releasePartsToMerge(pws []*partWrapper) { + ddb.partsLock.Lock() + for _, pw := range pws { + if !pw.isInMerge { + logger.Panicf("BUG: missing isInMerge flag on the part %q", pw.p.path) + } + pw.isInMerge = false + } + ddb.partsLock.Unlock() +} + +func (ddb *datadb) availableDiskSpace() uint64 { + available := fs.MustGetFreeSpace(ddb.path) + reserved := atomic.LoadUint64(&reservedDiskSpace) + if available < reserved { + return 0 + } + return available - reserved +} + +func (ddb *datadb) reserveDiskSpace(n uint64) bool { + available := fs.MustGetFreeSpace(ddb.path) + reserved := atomic.AddUint64(&reservedDiskSpace, n) + if available > reserved { + return true + } + ddb.releaseDiskSpace(n) + return false +} + +func (ddb *datadb) releaseDiskSpace(n uint64) { + atomic.AddUint64(&reservedDiskSpace, -n) +} + +// reservedDiskSpace tracks global reserved disk space for currently executed +// background merges across all the partitions. +// +// It should allow avoiding background merges when there is no free disk space. +var reservedDiskSpace uint64 + +func needStop(stopCh <-chan struct{}) bool { + select { + case <-stopCh: + return true + default: + return false + } +} + +// mustCloseDatadb can be called only when nobody accesses ddb. +func mustCloseDatadb(ddb *datadb) { + // Stop background workers + close(ddb.stopCh) + ddb.wg.Wait() + + // flush in-memory data to disk + pws := append([]*partWrapper{}, ddb.inmemoryParts...) + setInMergeLocked(pws) + ddb.mustMergePartsFinal(pws) + + // There is no need in using ddb.partsLock here, since nobody should acces ddb now. + for _, pw := range ddb.inmemoryParts { + pw.decRef() + if pw.refCount != 0 { + logger.Panicf("BUG: there are %d references to inmemoryPart", pw.refCount) + } + } + ddb.inmemoryParts = nil + + for _, pw := range ddb.fileParts { + pw.decRef() + if pw.refCount != 0 { + logger.Panicf("BUG: ther are %d references to filePart", pw.refCount) + } + } + ddb.fileParts = nil + + ddb.path = "" + ddb.pt = nil +} + +func getPartNames(pws []*partWrapper) []string { + partNames := make([]string, 0, len(pws)) + for _, pw := range pws { + if pw.mp != nil { + // Skip in-memory parts + continue + } + partName := filepath.Base(pw.p.path) + partNames = append(partNames, partName) + } + sort.Strings(partNames) + return partNames +} + +func mustWritePartNames(path string, partNames []string) { + data, err := json.Marshal(partNames) + if err != nil { + logger.Panicf("BUG: cannot marshal partNames to JSON: %s", err) + } + partNamesPath := filepath.Join(path, partsFilename) + fs.MustWriteAtomic(partNamesPath, data, true) +} + +func mustReadPartNames(path string) []string { + partNamesPath := filepath.Join(path, partsFilename) + data, err := os.ReadFile(partNamesPath) + if err != nil { + logger.Panicf("FATAL: cannot read %s: %s", partNamesPath, err) + } + var partNames []string + if err := json.Unmarshal(data, &partNames); err != nil { + logger.Panicf("FATAL: cannot parse %s: %s", partNamesPath, err) + } + return partNames +} + +// mustRemoveUnusedDirs removes dirs at path, which are missing in partNames. +// +// These dirs may be left after unclean shutdown. +func mustRemoveUnusedDirs(path string, partNames []string) { + des := fs.MustReadDir(path) + m := make(map[string]struct{}, len(partNames)) + for _, partName := range partNames { + m[partName] = struct{}{} + } + removedDirs := 0 + for _, de := range des { + if !fs.IsDirOrSymlink(de) { + // Skip non-directories. + continue + } + fn := de.Name() + if _, ok := m[fn]; !ok { + deletePath := filepath.Join(path, fn) + fs.MustRemoveAll(deletePath) + removedDirs++ + } + } + if removedDirs > 0 { + fs.MustSyncPath(path) + } +} + +// appendPartsToMerge finds optimal parts to merge from src, +// appends them to dst and returns the result. +func appendPartsToMerge(dst, src []*partWrapper, maxOutBytes uint64) []*partWrapper { + if len(src) < 2 { + // There is no need in merging zero or one part :) + return dst + } + + // Filter out too big parts. + // This should reduce N for O(N^2) algorithm below. + maxInPartBytes := uint64(float64(maxOutBytes) / minMergeMultiplier) + tmp := make([]*partWrapper, 0, len(src)) + for _, pw := range src { + if pw.p.ph.CompressedSizeBytes > maxInPartBytes { + continue + } + tmp = append(tmp, pw) + } + src = tmp + + sortPartsForOptimalMerge(src) + + maxSrcParts := defaultPartsToMerge + if maxSrcParts > len(src) { + maxSrcParts = len(src) + } + minSrcParts := (maxSrcParts + 1) / 2 + if minSrcParts < 2 { + minSrcParts = 2 + } + + // Exhaustive search for parts giving the lowest write amplification when merged. + var pws []*partWrapper + maxM := float64(0) + for i := minSrcParts; i <= maxSrcParts; i++ { + for j := 0; j <= len(src)-i; j++ { + a := src[j : j+i] + if a[0].p.ph.CompressedSizeBytes*uint64(len(a)) < a[len(a)-1].p.ph.CompressedSizeBytes { + // Do not merge parts with too big difference in size, + // since this results in unbalanced merges. + continue + } + outSize := getCompressedSize(a) + if outSize > maxOutBytes { + // There is no need in verifying remaining parts with bigger sizes. + break + } + m := float64(outSize) / float64(a[len(a)-1].p.ph.CompressedSizeBytes) + if m < maxM { + continue + } + maxM = m + pws = a + } + } + + minM := float64(defaultPartsToMerge) / 2 + if minM < minMergeMultiplier { + minM = minMergeMultiplier + } + if maxM < minM { + // There is no sense in merging parts with too small m, + // since this leads to high disk write IO. + return dst + } + return append(dst, pws...) +} + +func sortPartsForOptimalMerge(pws []*partWrapper) { + // Sort src parts by size and backwards timestamp. + // This should improve adjanced points' locality in the merged parts. + sort.Slice(pws, func(i, j int) bool { + a := &pws[i].p.ph + b := &pws[j].p.ph + if a.CompressedSizeBytes == b.CompressedSizeBytes { + return a.MinTimestamp > b.MinTimestamp + } + return a.CompressedSizeBytes < b.CompressedSizeBytes + }) +} + +func getCompressedSize(pws []*partWrapper) uint64 { + n := uint64(0) + for _, pw := range pws { + n += pw.p.ph.CompressedSizeBytes + } + return n +} + +func getUncompressedSize(pws []*partWrapper) uint64 { + n := uint64(0) + for _, pw := range pws { + n += pw.p.ph.UncompressedSizeBytes + } + return n +} + +func getRowsCount(pws []*partWrapper) uint64 { + n := uint64(0) + for _, pw := range pws { + n += pw.p.ph.RowsCount + } + return n +} + +func getBlocksCount(pws []*partWrapper) uint64 { + n := uint64(0) + for _, pw := range pws { + n += pw.p.ph.BlocksCount + } + return n +} + +func shouldUsePageCacheForPartSize(size uint64) bool { + mem := memory.Remaining() / defaultPartsToMerge + return size <= uint64(mem) +} diff --git a/lib/logstorage/datadb_test.go b/lib/logstorage/datadb_test.go new file mode 100644 index 000000000..5f97a9bd8 --- /dev/null +++ b/lib/logstorage/datadb_test.go @@ -0,0 +1,91 @@ +package logstorage + +import ( + "math/rand" + "testing" +) + +func TestAppendPartsToMergeManyParts(t *testing.T) { + // Verify that big number of parts are merged into minimal number of parts + // using minimum merges. + var sizes []uint64 + maxOutSize := uint64(0) + r := rand.New(rand.NewSource(1)) + for i := 0; i < 1024; i++ { + n := uint64(uint32(r.NormFloat64() * 1e9)) + n++ + maxOutSize += n + sizes = append(sizes, n) + } + pws := newTestPartWrappersForSizes(sizes) + + iterationsCount := 0 + sizeMergedTotal := uint64(0) + for { + pms := appendPartsToMerge(nil, pws, maxOutSize) + if len(pms) == 0 { + break + } + m := make(map[*partWrapper]bool) + for _, pw := range pms { + m[pw] = true + } + var pwsNew []*partWrapper + size := uint64(0) + for _, pw := range pws { + if m[pw] { + size += pw.p.ph.CompressedSizeBytes + } else { + pwsNew = append(pwsNew, pw) + } + } + pw := &partWrapper{ + p: &part{ + ph: partHeader{ + CompressedSizeBytes: size, + }, + }, + } + sizeMergedTotal += size + pwsNew = append(pwsNew, pw) + pws = pwsNew + iterationsCount++ + } + sizes = newTestSizesFromPartWrappers(pws) + sizeTotal := uint64(0) + for _, size := range sizes { + sizeTotal += uint64(size) + } + overhead := float64(sizeMergedTotal) / float64(sizeTotal) + if overhead > 2.1 { + t.Fatalf("too big overhead; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f", + sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead) + } + if len(sizes) > 18 { + t.Fatalf("too many sizes %d; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f", + len(sizes), sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead) + } +} + +func newTestSizesFromPartWrappers(pws []*partWrapper) []uint64 { + var sizes []uint64 + for _, pw := range pws { + sizes = append(sizes, pw.p.ph.CompressedSizeBytes) + } + return sizes +} + +func newTestPartWrappersForSizes(sizes []uint64) []*partWrapper { + var pws []*partWrapper + for _, size := range sizes { + pw := &partWrapper{ + p: &part{ + ph: partHeader{ + CompressedSizeBytes: size, + }, + }, + } + pws = append(pws, pw) + } + return pws +} diff --git a/lib/logstorage/encoding.go b/lib/logstorage/encoding.go new file mode 100644 index 000000000..48f05154d --- /dev/null +++ b/lib/logstorage/encoding.go @@ -0,0 +1,314 @@ +package logstorage + +import ( + "fmt" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" +) + +// marshalStringsBlock marshals a and appends the result to dst. +// +// The marshaled strings block can be unmarshaled with stringsBlockUnmarshaler. +func marshalStringsBlock(dst []byte, a []string) []byte { + // Encode string lengths + u64s := encoding.GetUint64s(len(a)) + aLens := u64s.A[:0] + for _, s := range a { + aLens = append(aLens, uint64(len(s))) + } + u64s.A = aLens + dst = marshalUint64Block(dst, u64s.A) + encoding.PutUint64s(u64s) + + // Encode strings + bb := bbPool.Get() + b := bb.B + for _, s := range a { + b = append(b, s...) + } + bb.B = b + dst = marshalBytesBlock(dst, bb.B) + bbPool.Put(bb) + + return dst +} + +// stringsBlockUnmarshaler is used for unmarshaling the block returned from marshalStringsBlock() +// +// use getStringsBlockUnmarshaler() for obtaining the unmarshaler from the pool in order to save memory allocations. +type stringsBlockUnmarshaler struct { + // data contains the data for the unmarshaled values + data []byte +} + +func (sbu *stringsBlockUnmarshaler) reset() { + sbu.data = sbu.data[:0] +} + +// unmarshal unmarshals itemsCount strings from src, appends them to dst and returns the result. +// +// The returned strings are valid until sbu.reset() call. +func (sbu *stringsBlockUnmarshaler) unmarshal(dst []string, src []byte, itemsCount uint64) ([]string, error) { + u64s := encoding.GetUint64s(0) + defer encoding.PutUint64s(u64s) + + // Decode string lengths + var tail []byte + var err error + u64s.A, tail, err = unmarshalUint64Block(u64s.A[:0], src, itemsCount) + if err != nil { + return dst, fmt.Errorf("cannot unmarshal string lengths: %w", err) + } + aLens := u64s.A + src = tail + + // Read bytes block into sbu.data + dataLen := len(sbu.data) + sbu.data, tail, err = unmarshalBytesBlock(sbu.data, src) + if err != nil { + return dst, fmt.Errorf("cannot unmarshal bytes block with strings: %w", err) + } + if len(tail) > 0 { + return dst, fmt.Errorf("unexpected non-empty tail after reading bytes block with strings; len(tail)=%d", len(tail)) + } + + // Decode strings from sbu.data into dst + data := sbu.data[dataLen:] + for _, sLen := range aLens { + if uint64(len(data)) < sLen { + return dst, fmt.Errorf("cannot unmarshal a string with the length %d bytes from %d bytes", sLen, len(data)) + } + s := bytesutil.ToUnsafeString(data[:sLen]) + data = data[sLen:] + dst = append(dst, s) + } + + return dst, nil +} + +// marshalUint64Block appends marshaled a to dst and returns the result. +func marshalUint64Block(dst []byte, a []uint64) []byte { + bb := bbPool.Get() + bb.B = marshalUint64Items(bb.B[:0], a) + dst = marshalBytesBlock(dst, bb.B) + bbPool.Put(bb) + return dst +} + +// unmarshalUint64Block appends unmarshaled from src itemsCount uint64 items to dst and returns the result. +func unmarshalUint64Block(dst []uint64, src []byte, itemsCount uint64) ([]uint64, []byte, error) { + bb := bbPool.Get() + defer bbPool.Put(bb) + + // Unmarshal the underlying bytes block + var err error + bb.B, src, err = unmarshalBytesBlock(bb.B[:0], src) + if err != nil { + return dst, src, fmt.Errorf("cannot unmarshal bytes block: %w", err) + } + + // Unmarshal the items from bb. + dst, err = unmarshalUint64Items(dst, bb.B, itemsCount) + if err != nil { + return dst, src, fmt.Errorf("cannot unmarshal %d uint64 items from bytes block of length %d bytes: %w", itemsCount, len(bb.B), err) + } + return dst, src, nil +} + +const ( + uintBlockType8 = 0 + uintBlockType16 = 1 + uintBlockType32 = 2 + uintBlockType64 = 3 +) + +// marshalUint64Items appends the marshaled a items to dst and returns the result. +func marshalUint64Items(dst []byte, a []uint64) []byte { + // Do not marshal len(a), since it is expected that unmarshaler knows it. + nMax := uint64(0) + for _, n := range a { + if n > nMax { + nMax = n + } + } + switch { + case nMax < (1 << 8): + dst = append(dst, uintBlockType8) + for _, n := range a { + dst = append(dst, byte(n)) + } + case nMax < (1 << 16): + dst = append(dst, uintBlockType16) + for _, n := range a { + dst = encoding.MarshalUint16(dst, uint16(n)) + } + case nMax < (1 << 32): + dst = append(dst, uintBlockType32) + for _, n := range a { + dst = encoding.MarshalUint32(dst, uint32(n)) + } + default: + dst = append(dst, uintBlockType64) + for _, n := range a { + dst = encoding.MarshalUint64(dst, uint64(n)) + } + } + return dst +} + +// unmarshalUint64Items appends unmarshaled from src itemsCount uint64 items to dst and returns the result. +func unmarshalUint64Items(dst []uint64, src []byte, itemsCount uint64) ([]uint64, error) { + // Unmarshal block type + if len(src) < 1 { + return dst, fmt.Errorf("cannot unmarshal uint64 block type from empty src") + } + blockType := src[0] + src = src[1:] + + switch blockType { + case uintBlockType8: + // A block with items smaller than 1<<8 bytes + if uint64(len(src)) != itemsCount { + return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), itemsCount) + } + for _, v := range src { + dst = append(dst, uint64(v)) + } + case uintBlockType16: + // A block with items smaller than 1<<16 bytes + if uint64(len(src)) != 2*itemsCount { + return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 2*itemsCount) + } + for len(src) > 0 { + v := encoding.UnmarshalUint16(src) + src = src[2:] + dst = append(dst, uint64(v)) + } + case uintBlockType32: + // A block with items smaller than 1<<32 bytes + if uint64(len(src)) != 4*itemsCount { + return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 4*itemsCount) + } + for len(src) > 0 { + v := encoding.UnmarshalUint32(src) + src = src[4:] + dst = append(dst, uint64(v)) + } + case uintBlockType64: + // A block with items smaller than 1<<64 bytes + if uint64(len(src)) != 8*itemsCount { + return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 8*itemsCount) + } + for len(src) > 0 { + v := encoding.UnmarshalUint64(src) + src = src[8:] + dst = append(dst, v) + } + default: + return dst, fmt.Errorf("unexpected uint64 block type: %d; want 0, 1, 2 or 3", blockType) + } + return dst, nil +} + +const ( + marshalBytesTypePlain = 0 + marshalBytesTypeZSTD = 1 +) + +func marshalBytesBlock(dst, src []byte) []byte { + if len(src) < 128 { + // Marshal the block in plain without compression + dst = append(dst, marshalBytesTypePlain) + dst = append(dst, byte(len(src))) + return append(dst, src...) + } + + // Compress the block + dst = append(dst, marshalBytesTypeZSTD) + bb := bbPool.Get() + bb.B = encoding.CompressZSTDLevel(bb.B[:0], src, 1) + dst = encoding.MarshalVarUint64(dst, uint64(len(bb.B))) + dst = append(dst, bb.B...) + bbPool.Put(bb) + return dst +} + +func unmarshalBytesBlock(dst, src []byte) ([]byte, []byte, error) { + if len(src) < 1 { + return dst, src, fmt.Errorf("cannot unmarshal block type from empty src") + } + blockType := src[0] + src = src[1:] + switch blockType { + case marshalBytesTypePlain: + // Plain block + + // Read block length + if len(src) < 1 { + return dst, src, fmt.Errorf("cannot unmarshal plain block size from empty src") + } + blockLen := int(src[0]) + src = src[1:] + if len(src) < blockLen { + return dst, src, fmt.Errorf("cannot read plain block with the size %d bytes from %b bytes", blockLen, len(src)) + } + + // Copy the block to dst + dst = append(dst, src[:blockLen]...) + src = src[blockLen:] + return dst, src, nil + case marshalBytesTypeZSTD: + // Compressed block + + // Read block length + tail, blockLen, err := encoding.UnmarshalVarUint64(src) + if err != nil { + return dst, src, fmt.Errorf("cannot unmarshal compressed block size: %w", err) + } + src = tail + if uint64(len(src)) < blockLen { + return dst, src, fmt.Errorf("cannot read compressed block with the size %d bytes from %d bytes", blockLen, len(src)) + } + compressedBlock := src[:blockLen] + src = src[blockLen:] + + // Decompress the block + bb := bbPool.Get() + bb.B, err = encoding.DecompressZSTD(bb.B[:0], compressedBlock) + if err != nil { + return dst, src, fmt.Errorf("cannot decompress block: %w", err) + } + + // Copy the decompressed block to dst. + dst = append(dst, bb.B...) + bbPool.Put(bb) + return dst, src, nil + default: + return dst, src, fmt.Errorf("unexpected block type: %d; supported types: 0, 1", blockType) + } +} + +var bbPool bytesutil.ByteBufferPool + +// getStringsBlockUnmarshaler returns stringsBlockUnmarshaler from the pool. +// +// Return back the stringsBlockUnmarshaler to the pool by calling putStringsBlockUnmarshaler(). +func getStringsBlockUnmarshaler() *stringsBlockUnmarshaler { + v := sbuPool.Get() + if v == nil { + return &stringsBlockUnmarshaler{} + } + return v.(*stringsBlockUnmarshaler) +} + +// putStringsBlockUnmarshaler returns back sbu to the pool. +// +// sbu mustn't be used after returning to the pool. +func putStringsBlockUnmarshaler(sbu *stringsBlockUnmarshaler) { + sbu.reset() + sbuPool.Put(sbu) +} + +var sbuPool sync.Pool diff --git a/lib/logstorage/encoding_test.go b/lib/logstorage/encoding_test.go new file mode 100644 index 000000000..3050e531b --- /dev/null +++ b/lib/logstorage/encoding_test.go @@ -0,0 +1,86 @@ +package logstorage + +import ( + "fmt" + "reflect" + "strings" + "testing" +) + +func TestMarshalUnmarshalStringsBlock(t *testing.T) { + f := func(logs string, blockLenExpected int) { + t.Helper() + var a []string + if logs != "" { + a = strings.Split(logs, "\n") + } + data := marshalStringsBlock(nil, a) + if len(data) != blockLenExpected { + t.Fatalf("unexpected block length; got %d; want %d; block=%q", len(data), blockLenExpected, data) + } + sbu := getStringsBlockUnmarshaler() + values, err := sbu.unmarshal(nil, data, uint64(len(a))) + if err != nil { + t.Fatalf("cannot unmarshal strings block: %s", err) + } + if !reflect.DeepEqual(values, a) { + t.Fatalf("unexpected strings after unmarshaling;\ngot\n%q\nwant\n%q", values, a) + } + putStringsBlockUnmarshaler(sbu) + } + f("", 5) + f("foo", 9) + f(`foo +bar +baz +`, 18) + f(` +Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service. +Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts) +Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts... +Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz) +Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz) +Apr 28 13:43:37 localhost NetworkManager[1516]: [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL +Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined") +Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service... +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher' +Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service. +Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts) +Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts... +Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online +Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1) +Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034) +Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034) +Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal +`, 951) + + // Generate a string longer than 1<<16 bytes + s := "foo" + for len(s) < (1 << 16) { + s += s + } + s += "\n" + lines := s + f(lines, 36) + lines += s + f(lines, 52) + + // Generate more than 256 strings + lines = "" + for i := 0; i < 1000; i++ { + lines += fmt.Sprintf("line %d\n", i) + } + f(lines, 766) +} diff --git a/lib/logstorage/encoding_timing_test.go b/lib/logstorage/encoding_timing_test.go new file mode 100644 index 000000000..6bb0f21ff --- /dev/null +++ b/lib/logstorage/encoding_timing_test.go @@ -0,0 +1,73 @@ +package logstorage + +import ( + "fmt" + "strings" + "testing" +) + +func BenchmarkMarshalStringsBlock(b *testing.B) { + block := strings.Split(benchLogs, "\n") + + b.SetBytes(int64(len(benchLogs))) + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + var buf []byte + for pb.Next() { + buf = marshalStringsBlock(buf[:0], block) + } + }) +} + +func BenchmarkStringsBlockUnmarshaler_Unmarshal(b *testing.B) { + block := strings.Split(benchLogs, "\n") + data := marshalStringsBlock(nil, block) + + b.SetBytes(int64(len(benchLogs))) + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + sbu := getStringsBlockUnmarshaler() + var values []string + for pb.Next() { + var err error + values, err = sbu.unmarshal(values[:0], data, uint64(len(block))) + if err != nil { + panic(fmt.Errorf("unexpected error: %w", err)) + } + sbu.reset() + } + putStringsBlockUnmarshaler(sbu) + }) +} + +const benchLogs = ` +Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service. +Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts) +Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts... +Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz) +Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz) +Apr 28 13:43:37 localhost NetworkManager[1516]: [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL +Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined") +Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service... +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10 +Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher' +Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service. +Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts) +Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts... +Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online +Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1) +Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034) +Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034) +Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400) +Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal +Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal +` diff --git a/lib/logstorage/filenames.go b/lib/logstorage/filenames.go new file mode 100644 index 000000000..cd4ac99c5 --- /dev/null +++ b/lib/logstorage/filenames.go @@ -0,0 +1,22 @@ +package logstorage + +const ( + metaindexFilename = "metaindex.bin" + indexFilename = "index.bin" + columnsHeaderFilename = "columns_header.bin" + timestampsFilename = "timestamps.bin" + fieldValuesFilename = "field_values.bin" + fieldBloomFilename = "field_bloom.bin" + messageValuesFilename = "message_values.bin" + messageBloomFilename = "message_bloom.bin" + + metadataFilename = "metadata.json" + partsFilename = "parts.json" + + streamIDCacheFilename = "stream_id.bin" + + indexdbDirname = "indexdb" + datadbDirname = "datadb" + cacheDirname = "cache" + partitionsDirname = "partitions" +) diff --git a/lib/logstorage/filters.go b/lib/logstorage/filters.go new file mode 100644 index 000000000..55a8a9905 --- /dev/null +++ b/lib/logstorage/filters.go @@ -0,0 +1,3053 @@ +package logstorage + +import ( + "bytes" + "fmt" + "math" + "regexp" + "strconv" + "strings" + "sync" + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +func getFilterBitmap(bitsLen int) *filterBitmap { + v := filterBitmapPool.Get() + if v == nil { + v = &filterBitmap{} + } + bm := v.(*filterBitmap) + bm.init(bitsLen) + return bm +} + +func putFilterBitmap(bm *filterBitmap) { + bm.reset() + filterBitmapPool.Put(bm) +} + +var filterBitmapPool sync.Pool + +type filterBitmap struct { + a []uint64 + bitsLen int +} + +func (bm *filterBitmap) reset() { + bm.resetBits() + bm.a = bm.a[:0] + + bm.bitsLen = 0 +} + +func (bm *filterBitmap) copyFrom(src *filterBitmap) { + bm.reset() + + bm.a = append(bm.a[:0], src.a...) + bm.bitsLen = src.bitsLen +} + +func (bm *filterBitmap) init(bitsLen int) { + a := bm.a + wordsLen := (bitsLen + 63) / 64 + if n := wordsLen - cap(a); n > 0 { + a = append(a[:cap(a)], make([]uint64, n)...) + } + a = a[:wordsLen] + bm.a = a + bm.bitsLen = bitsLen +} + +func (bm *filterBitmap) resetBits() { + a := bm.a + for i := range a { + a[i] = 0 + } +} + +func (bm *filterBitmap) setBits() { + a := bm.a + for i := range a { + a[i] = ^uint64(0) + } + tailBits := bm.bitsLen % 64 + if tailBits > 0 && len(a) > 0 { + // Zero bits outside bitsLen at the last word + a[len(a)-1] &= (uint64(1) << tailBits) - 1 + } +} + +func (bm *filterBitmap) isZero() bool { + for _, word := range bm.a { + if word != 0 { + return false + } + } + return true +} + +func (bm *filterBitmap) andNot(x *filterBitmap) { + if bm.bitsLen != x.bitsLen { + logger.Panicf("BUG: cannot merge bitmaps with distinct lengths; %d vs %d", bm.bitsLen, x.bitsLen) + } + a := bm.a + b := x.a + for i := range a { + a[i] &= ^b[i] + } +} + +func (bm *filterBitmap) or(x *filterBitmap) { + if bm.bitsLen != x.bitsLen { + logger.Panicf("BUG: cannot merge bitmaps with distinct lengths; %d vs %d", bm.bitsLen, x.bitsLen) + } + a := bm.a + b := x.a + for i := range a { + a[i] |= b[i] + } +} + +// forEachSetBit calls f for each set bit and clears that bit if f returns false +func (bm *filterBitmap) forEachSetBit(f func(idx int) bool) { + a := bm.a + bitsLen := bm.bitsLen + for i, word := range a { + if word == 0 { + continue + } + for j := 0; j < 64; j++ { + mask := uint64(1) << j + if (word & mask) == 0 { + continue + } + idx := i*64 + j + if idx >= bitsLen { + break + } + if !f(idx) { + a[i] &= ^mask + } + } + } +} + +type filter interface { + // String returns string representation of the filter + String() string + + // updateReferencedColumnNames updates m with the column names referenced by the filter + updateReferencedColumnNames(m map[string]struct{}) + + // apply must update bm according to the filter applied to the given bs block + apply(bs *blockSearch, bm *filterBitmap) +} + +// noopFilter does nothing +type noopFilter struct { +} + +func (nf *noopFilter) String() string { + return "" +} + +func (nf *noopFilter) updateReferencedColumnNames(m map[string]struct{}) { + // nothing to do +} + +func (nf *noopFilter) apply(bs *blockSearch, bm *filterBitmap) { + // nothing to do +} + +// orFilter contains filters joined by OR operator. +// +// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL. +type orFilter struct { + filters []filter +} + +func (of *orFilter) String() string { + filters := of.filters + a := make([]string, len(filters)) + for i, f := range filters { + s := f.String() + a[i] = s + } + return strings.Join(a, " or ") +} + +func (of *orFilter) updateReferencedColumnNames(m map[string]struct{}) { + for _, f := range of.filters { + f.updateReferencedColumnNames(m) + } +} + +func (of *orFilter) apply(bs *blockSearch, bm *filterBitmap) { + bmResult := getFilterBitmap(bm.bitsLen) + bmTmp := getFilterBitmap(bm.bitsLen) + for _, f := range of.filters { + // Minimize the number of rows to check by the filter by checking only + // the rows, which may change the output bm: + // - bm matches them, e.g. the caller wants to get them + // - bmResult doesn't match them, e.g. all the previous OR filters didn't match them + bmTmp.copyFrom(bm) + bmTmp.andNot(bmResult) + if bmTmp.isZero() { + // Shortcut - there is no need in applying the remaining filters, + // since the result already matches all the values from the block. + break + } + f.apply(bs, bmTmp) + bmResult.or(bmTmp) + } + putFilterBitmap(bmTmp) + bm.copyFrom(bmResult) + putFilterBitmap(bmResult) +} + +// andFilter contains filters joined by AND opertor. +// +// It is expressed as `f1 AND f2 ... AND fN` in LogsQL. +type andFilter struct { + filters []filter +} + +func (af *andFilter) String() string { + filters := af.filters + a := make([]string, len(filters)) + for i, f := range filters { + s := f.String() + switch f.(type) { + case *orFilter: + s = "(" + s + ")" + } + a[i] = s + } + return strings.Join(a, " ") +} + +func (af *andFilter) updateReferencedColumnNames(m map[string]struct{}) { + for _, f := range af.filters { + f.updateReferencedColumnNames(m) + } +} + +func (af *andFilter) apply(bs *blockSearch, bm *filterBitmap) { + for _, f := range af.filters { + f.apply(bs, bm) + if bm.isZero() { + // Shortcut - there is no need in applying the remaining filters, + // since the result will be zero anyway. + return + } + } +} + +// notFilter negates the filter. +// +// It is expressed as `NOT f` or `!f` in LogsQL. +type notFilter struct { + f filter +} + +func (nf *notFilter) String() string { + s := nf.f.String() + switch nf.f.(type) { + case *andFilter, *orFilter: + s = "(" + s + ")" + } + return "!" + s +} + +func (nf *notFilter) updateReferencedColumnNames(m map[string]struct{}) { + nf.f.updateReferencedColumnNames(m) +} + +func (nf *notFilter) apply(bs *blockSearch, bm *filterBitmap) { + // Minimize the number of rows to check by the filter by applying it + // only to the rows, which match the bm, e.g. they may change the bm result. + bmTmp := getFilterBitmap(bm.bitsLen) + bmTmp.copyFrom(bm) + nf.f.apply(bs, bmTmp) + bm.andNot(bmTmp) + putFilterBitmap(bmTmp) +} + +// streamFilter is the filter for `_stream:{...}` +type streamFilter struct { + // f is the filter to apply + f *StreamFilter + + // tenantIDs is the list of tenantIDs to search for streamIDs. + tenantIDs []TenantID + + // idb is the indexdb to search for streamIDs. + idb *indexdb + + streamIDsOnce sync.Once + streamIDs map[streamID]struct{} +} + +func (sf *streamFilter) String() string { + s := sf.f.String() + if s == "{}" { + return "" + } + return "_stream:" + s +} + +func (sf *streamFilter) getStreamIDs() map[streamID]struct{} { + sf.streamIDsOnce.Do(sf.initStreamIDs) + return sf.streamIDs +} + +func (sf *streamFilter) initStreamIDs() { + streamIDs := sf.idb.searchStreamIDs(sf.tenantIDs, sf.f) + m := make(map[streamID]struct{}, len(streamIDs)) + for i := range streamIDs { + m[streamIDs[i]] = struct{}{} + } + sf.streamIDs = m +} + +func (sf *streamFilter) updateReferencedColumnNames(m map[string]struct{}) { + m["_stream"] = struct{}{} +} + +func (sf *streamFilter) apply(bs *blockSearch, bm *filterBitmap) { + if sf.f.isEmpty() { + return + } + streamIDs := sf.getStreamIDs() + if _, ok := streamIDs[bs.bsw.bh.streamID]; !ok { + bm.resetBits() + return + } +} + +// timeFilter filters by time. +// +// It is expressed as `_time:(start, end]` in LogsQL. +type timeFilter struct { + minTimestamp int64 + maxTimestamp int64 + + stringRepr string +} + +func (tf *timeFilter) String() string { + return "_time:" + tf.stringRepr +} + +func (tf *timeFilter) updateReferencedColumnNames(m map[string]struct{}) { + m["_time"] = struct{}{} +} + +func (tf *timeFilter) apply(bs *blockSearch, bm *filterBitmap) { + minTimestamp := tf.minTimestamp + maxTimestamp := tf.maxTimestamp + + if minTimestamp > maxTimestamp { + bm.resetBits() + return + } + + th := bs.bsw.bh.timestampsHeader + if minTimestamp > th.maxTimestamp || maxTimestamp < th.minTimestamp { + bm.resetBits() + return + } + if minTimestamp <= th.minTimestamp && maxTimestamp >= th.maxTimestamp { + return + } + + timestamps := bs.getTimestamps() + bm.forEachSetBit(func(idx int) bool { + ts := timestamps[idx] + return ts >= minTimestamp && ts <= maxTimestamp + }) +} + +// sequenceFilter matches an ordered sequence of phrases +// +// Example LogsQL: `fieldName:seq(foo, "bar baz")` +type sequenceFilter struct { + fieldName string + phrases []string + + tokensOnce sync.Once + tokens []string + + nonEmptyPhrasesOnce sync.Once + nonEmptyPhrases []string +} + +func (sf *sequenceFilter) String() string { + phrases := sf.phrases + a := make([]string, len(phrases)) + for i, phrase := range phrases { + a[i] = quoteTokenIfNeeded(phrase) + } + return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(sf.fieldName), strings.Join(a, ",")) +} + +func (sf *sequenceFilter) getTokens() []string { + sf.tokensOnce.Do(sf.initTokens) + return sf.tokens +} + +func (sf *sequenceFilter) initTokens() { + phrases := sf.getNonEmptyPhrases() + tokens := tokenizeStrings(nil, phrases) + sf.tokens = tokens +} + +func (sf *sequenceFilter) getNonEmptyPhrases() []string { + sf.nonEmptyPhrasesOnce.Do(sf.initNonEmptyPhrases) + return sf.nonEmptyPhrases +} + +func (sf *sequenceFilter) initNonEmptyPhrases() { + phrases := sf.phrases + result := make([]string, 0, len(phrases)) + for _, phrase := range phrases { + if phrase != "" { + result = append(result, phrase) + } + } + sf.nonEmptyPhrases = result +} + +func (sf *sequenceFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[sf.fieldName] = struct{}{} +} + +func (sf *sequenceFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := sf.fieldName + phrases := sf.getNonEmptyPhrases() + + if len(phrases) == 0 { + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchSequence(v, phrases) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty phrase. + if !matchSequence("", phrases) { + bm.resetBits() + } + return + } + + tokens := sf.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringBySequence(bs, ch, bm, phrases, tokens) + case valueTypeDict: + matchValuesDictBySequence(bs, ch, bm, phrases) + case valueTypeUint8: + matchUint8BySequence(bs, ch, bm, phrases, tokens) + case valueTypeUint16: + matchUint16BySequence(bs, ch, bm, phrases, tokens) + case valueTypeUint32: + matchUint32BySequence(bs, ch, bm, phrases, tokens) + case valueTypeUint64: + matchUint64BySequence(bs, ch, bm, phrases, tokens) + case valueTypeFloat64: + matchFloat64BySequence(bs, ch, bm, phrases, tokens) + case valueTypeIPv4: + matchIPv4BySequence(bs, ch, bm, phrases, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601BySequence(bs, ch, bm, phrases, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// exactPrefixFilter matches the exact prefix. +// +// Example LogsQL: `fieldName:exact_prefix("foo bar") +type exactPrefixFilter struct { + fieldName string + prefix string + + tokensOnce sync.Once + tokens []string +} + +func (ef *exactPrefixFilter) String() string { + return fmt.Sprintf("%sexact_prefix(%s)", quoteFieldNameIfNeeded(ef.fieldName), quoteTokenIfNeeded(ef.prefix)) +} + +func (ef *exactPrefixFilter) getTokens() []string { + ef.tokensOnce.Do(ef.initTokens) + return ef.tokens +} + +func (ef *exactPrefixFilter) initTokens() { + ef.tokens = getTokensSkipLast(ef.prefix) +} + +func (ef *exactPrefixFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[ef.fieldName] = struct{}{} +} + +func (ef *exactPrefixFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := ef.fieldName + prefix := ef.prefix + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchExactPrefix(v, prefix) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + if !matchExactPrefix("", prefix) { + bm.resetBits() + } + return + } + + tokens := ef.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeDict: + matchValuesDictByExactPrefix(bs, ch, bm, prefix) + case valueTypeUint8: + matchUint8ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeUint16: + matchUint16ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeUint32: + matchUint32ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeUint64: + matchUint64ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeFloat64: + matchFloat64ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeIPv4: + matchIPv4ByExactPrefix(bs, ch, bm, prefix, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByExactPrefix(bs, ch, bm, prefix, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// exactFilter matches the exact value. +// +// Example LogsQL: `fieldName:exact("foo bar")` +type exactFilter struct { + fieldName string + value string + + tokensOnce sync.Once + tokens []string +} + +func (ef *exactFilter) String() string { + return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(ef.fieldName), quoteTokenIfNeeded(ef.value)) +} + +func (ef *exactFilter) getTokens() []string { + ef.tokensOnce.Do(ef.initTokens) + return ef.tokens +} + +func (ef *exactFilter) initTokens() { + ef.tokens = tokenizeStrings(nil, []string{ef.value}) +} + +func (ef *exactFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[ef.fieldName] = struct{}{} +} + +func (ef *exactFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := ef.fieldName + value := ef.value + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if value != v { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty value. + if value != "" { + bm.resetBits() + } + return + } + + tokens := ef.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByExactValue(bs, ch, bm, value, tokens) + case valueTypeDict: + matchValuesDictByExactValue(bs, ch, bm, value) + case valueTypeUint8: + matchUint8ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint16: + matchUint16ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint32: + matchUint32ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint64: + matchUint64ByExactValue(bs, ch, bm, value, tokens) + case valueTypeFloat64: + matchFloat64ByExactValue(bs, ch, bm, value, tokens) + case valueTypeIPv4: + matchIPv4ByExactValue(bs, ch, bm, value, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByExactValue(bs, ch, bm, value, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// inFilter matches any exact value from the values map. +// +// Example LogsQL: `fieldName:in("foo", "bar baz")` +type inFilter struct { + fieldName string + values []string + + tokenSetsOnce sync.Once + tokenSets [][]string + + stringValuesOnce sync.Once + stringValues map[string]struct{} + + uint8ValuesOnce sync.Once + uint8Values map[string]struct{} + + uint16ValuesOnce sync.Once + uint16Values map[string]struct{} + + uint32ValuesOnce sync.Once + uint32Values map[string]struct{} + + uint64ValuesOnce sync.Once + uint64Values map[string]struct{} + + float64ValuesOnce sync.Once + float64Values map[string]struct{} + + ipv4ValuesOnce sync.Once + ipv4Values map[string]struct{} + + timestampISO8601ValuesOnce sync.Once + timestampISO8601Values map[string]struct{} +} + +func (af *inFilter) String() string { + values := af.values + a := make([]string, len(values)) + for i, value := range values { + a[i] = quoteTokenIfNeeded(value) + } + return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(af.fieldName), strings.Join(a, ",")) +} + +func (af *inFilter) getTokenSets() [][]string { + af.tokenSetsOnce.Do(af.initTokenSets) + return af.tokenSets +} + +// It is faster to match every row in the block instead of checking too big number of tokenSets against bloom filter. +const maxTokenSetsToInit = 1000 + +func (af *inFilter) initTokenSets() { + values := af.values + tokenSetsLen := len(values) + if tokenSetsLen > maxTokenSetsToInit { + tokenSetsLen = maxTokenSetsToInit + } + tokenSets := make([][]string, 0, tokenSetsLen+1) + for _, v := range values { + tokens := tokenizeStrings(nil, []string{v}) + tokenSets = append(tokenSets, tokens) + if len(tokens) > maxTokenSetsToInit { + break + } + } + af.tokenSets = tokenSets +} + +func (af *inFilter) getStringValues() map[string]struct{} { + af.stringValuesOnce.Do(af.initStringValues) + return af.stringValues +} + +func (af *inFilter) initStringValues() { + values := af.values + m := make(map[string]struct{}, len(values)) + for _, v := range values { + m[v] = struct{}{} + } + af.stringValues = m +} + +func (af *inFilter) getUint8Values() map[string]struct{} { + af.uint8ValuesOnce.Do(af.initUint8Values) + return af.uint8Values +} + +func (af *inFilter) initUint8Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*1) + for _, v := range values { + n, ok := tryParseUint64(v) + if !ok || n >= (1<<8) { + continue + } + bufLen := len(buf) + buf = append(buf, byte(n)) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.uint8Values = m +} + +func (af *inFilter) getUint16Values() map[string]struct{} { + af.uint16ValuesOnce.Do(af.initUint16Values) + return af.uint16Values +} + +func (af *inFilter) initUint16Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*2) + for _, v := range values { + n, ok := tryParseUint64(v) + if !ok || n >= (1<<16) { + continue + } + bufLen := len(buf) + buf = encoding.MarshalUint16(buf, uint16(n)) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.uint16Values = m +} + +func (af *inFilter) getUint32Values() map[string]struct{} { + af.uint32ValuesOnce.Do(af.initUint32Values) + return af.uint32Values +} + +func (af *inFilter) initUint32Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*4) + for _, v := range values { + n, ok := tryParseUint64(v) + if !ok || n >= (1<<32) { + continue + } + bufLen := len(buf) + buf = encoding.MarshalUint32(buf, uint32(n)) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.uint32Values = m +} + +func (af *inFilter) getUint64Values() map[string]struct{} { + af.uint64ValuesOnce.Do(af.initUint64Values) + return af.uint64Values +} + +func (af *inFilter) initUint64Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*8) + for _, v := range values { + n, ok := tryParseUint64(v) + if !ok { + continue + } + bufLen := len(buf) + buf = encoding.MarshalUint64(buf, n) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.uint64Values = m +} + +func (af *inFilter) getFloat64Values() map[string]struct{} { + af.float64ValuesOnce.Do(af.initFloat64Values) + return af.float64Values +} + +func (af *inFilter) initFloat64Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*8) + for _, v := range values { + f, ok := tryParseFloat64(v) + if !ok { + continue + } + n := math.Float64bits(f) + bufLen := len(buf) + buf = encoding.MarshalUint64(buf, n) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.float64Values = m +} + +func (af *inFilter) getIPv4Values() map[string]struct{} { + af.ipv4ValuesOnce.Do(af.initIPv4Values) + return af.ipv4Values +} + +func (af *inFilter) initIPv4Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*4) + for _, v := range values { + n, ok := tryParseIPv4(v) + if !ok { + continue + } + bufLen := len(buf) + buf = encoding.MarshalUint32(buf, uint32(n)) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.ipv4Values = m +} + +func (af *inFilter) getTimestampISO8601Values() map[string]struct{} { + af.timestampISO8601ValuesOnce.Do(af.initTimestampISO8601Values) + return af.timestampISO8601Values +} + +func (af *inFilter) initTimestampISO8601Values() { + values := af.values + m := make(map[string]struct{}, len(values)) + buf := make([]byte, 0, len(values)*8) + for _, v := range values { + n, ok := tryParseTimestampISO8601(v) + if !ok { + continue + } + bufLen := len(buf) + buf = encoding.MarshalUint64(buf, n) + s := bytesutil.ToUnsafeString(buf[bufLen:]) + m[s] = struct{}{} + } + af.timestampISO8601Values = m +} + +func (af *inFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[af.fieldName] = struct{}{} +} + +func (af *inFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := af.fieldName + + if len(af.values) == 0 { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + stringValues := af.getStringValues() + if _, ok := stringValues[v]; !ok { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty phrase. + stringValues := af.getStringValues() + if _, ok := stringValues[""]; !ok { + bm.resetBits() + } + return + } + + tokenSets := af.getTokenSets() + + switch ch.valueType { + case valueTypeString: + stringValues := af.getStringValues() + matchAnyValue(bs, ch, bm, stringValues, tokenSets) + case valueTypeDict: + stringValues := af.getStringValues() + matchValuesDictByAnyValue(bs, ch, bm, stringValues) + case valueTypeUint8: + binValues := af.getUint8Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeUint16: + binValues := af.getUint16Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeUint32: + binValues := af.getUint32Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeUint64: + binValues := af.getUint64Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeFloat64: + binValues := af.getFloat64Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeIPv4: + binValues := af.getIPv4Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + case valueTypeTimestampISO8601: + binValues := af.getTimestampISO8601Values() + matchAnyValue(bs, ch, bm, binValues, tokenSets) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// ipv4RangeFilter matches the given ipv4 range [minValue..maxValue]. +// +// Example LogsQL: `fieldName:ipv4_range(127.0.0.1, 127.0.0.255)` +type ipv4RangeFilter struct { + fieldName string + minValue uint32 + maxValue uint32 +} + +func (rf *ipv4RangeFilter) String() string { + minValue := string(encoding.MarshalUint32(nil, rf.minValue)) + maxValue := string(encoding.MarshalUint32(nil, rf.maxValue)) + return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(rf.fieldName), toIPv4String(nil, minValue), toIPv4String(nil, maxValue)) +} + +func (rf *ipv4RangeFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[rf.fieldName] = struct{}{} +} + +func (rf *ipv4RangeFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := rf.fieldName + minValue := rf.minValue + maxValue := rf.maxValue + + if minValue > maxValue { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchIPv4Range(v, minValue, maxValue) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + bm.resetBits() + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByIPv4Range(bs, ch, bm, minValue, maxValue) + case valueTypeDict: + matchValuesDictByIPv4Range(bs, ch, bm, minValue, maxValue) + case valueTypeUint8: + bm.resetBits() + case valueTypeUint16: + bm.resetBits() + case valueTypeUint32: + bm.resetBits() + case valueTypeUint64: + bm.resetBits() + case valueTypeFloat64: + bm.resetBits() + case valueTypeIPv4: + matchIPv4ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeTimestampISO8601: + bm.resetBits() + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// stringRangeFilter matches tie given string range [minValue..maxValue) +// +// Note that the minValue is included in the range, while the maxValue isn't included in the range. +// This simplifies querying distincts log sets with string_range(A, B), string_range(B, C), etc. +// +// Example LogsQL: `fieldName:string_range(minValue, maxValue)` +type stringRangeFilter struct { + fieldName string + minValue string + maxValue string +} + +func (rf *stringRangeFilter) String() string { + return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(rf.fieldName), quoteTokenIfNeeded(rf.minValue), quoteTokenIfNeeded(rf.maxValue)) +} + +func (rf *stringRangeFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[rf.fieldName] = struct{}{} +} + +func (rf *stringRangeFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := rf.fieldName + minValue := rf.minValue + maxValue := rf.maxValue + + if minValue > maxValue { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchStringRange(v, minValue, maxValue) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + if !matchStringRange("", minValue, maxValue) { + bm.resetBits() + } + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeDict: + matchValuesDictByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint8: + matchUint8ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint16: + matchUint16ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint32: + matchUint32ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint64: + matchUint64ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeFloat64: + matchFloat64ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeIPv4: + matchIPv4ByStringRange(bs, ch, bm, minValue, maxValue) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByStringRange(bs, ch, bm, minValue, maxValue) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// lenRangeFilter matches field values with the length in the given range [minLen, maxLen]. +// +// Example LogsQL: `fieldName:len_range(10, 20)` +type lenRangeFilter struct { + fieldName string + minLen uint64 + maxLen uint64 +} + +func (rf *lenRangeFilter) String() string { + return quoteFieldNameIfNeeded(rf.fieldName) + fmt.Sprintf("len_range(%d,%d)", rf.minLen, rf.maxLen) +} + +func (rf *lenRangeFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[rf.fieldName] = struct{}{} +} + +func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := rf.fieldName + minLen := rf.minLen + maxLen := rf.maxLen + + if minLen > maxLen { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchLenRange(v, minLen, maxLen) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + if !matchLenRange("", minLen, maxLen) { + bm.resetBits() + } + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeDict: + matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint8: + matchUint8ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint16: + matchUint16ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint32: + matchUint32ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint64: + matchUint64ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeFloat64: + matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeIPv4: + matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByLenRange(bm, minLen, maxLen) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// rangeFilter matches the given range [minValue..maxValue]. +// +// Example LogsQL: `fieldName:range(minValue, maxValue]` +type rangeFilter struct { + fieldName string + minValue float64 + maxValue float64 + + stringRepr string +} + +func (rf *rangeFilter) String() string { + return quoteFieldNameIfNeeded(rf.fieldName) + "range" + rf.stringRepr +} + +func (rf *rangeFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[rf.fieldName] = struct{}{} +} + +func (rf *rangeFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := rf.fieldName + minValue := rf.minValue + maxValue := rf.maxValue + + if minValue > maxValue { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchRange(v, minValue, maxValue) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + bm.resetBits() + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByRange(bs, ch, bm, minValue, maxValue) + case valueTypeDict: + matchValuesDictByRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint8: + matchUint8ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint16: + matchUint16ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint32: + matchUint32ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeUint64: + matchUint64ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeFloat64: + matchFloat64ByRange(bs, ch, bm, minValue, maxValue) + case valueTypeIPv4: + bm.resetBits() + case valueTypeTimestampISO8601: + bm.resetBits() + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// regexpFilter matches the given regexp +// +// Example LogsQL: `fieldName:re("regexp")` +type regexpFilter struct { + fieldName string + re *regexp.Regexp +} + +func (rf *regexpFilter) String() string { + return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(rf.fieldName), rf.re.String()) +} + +func (rf *regexpFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[rf.fieldName] = struct{}{} +} + +func (rf *regexpFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := rf.fieldName + re := rf.re + + // Verify whether filter matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !re.MatchString(v) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + if !re.MatchString("") { + bm.resetBits() + } + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByRegexp(bs, ch, bm, re) + case valueTypeDict: + matchValuesDictByRegexp(bs, ch, bm, re) + case valueTypeUint8: + matchUint8ByRegexp(bs, ch, bm, re) + case valueTypeUint16: + matchUint16ByRegexp(bs, ch, bm, re) + case valueTypeUint32: + matchUint32ByRegexp(bs, ch, bm, re) + case valueTypeUint64: + matchUint64ByRegexp(bs, ch, bm, re) + case valueTypeFloat64: + matchFloat64ByRegexp(bs, ch, bm, re) + case valueTypeIPv4: + matchIPv4ByRegexp(bs, ch, bm, re) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByRegexp(bs, ch, bm, re) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// anyCasePrefixFilter matches the given prefix in lower, upper and mixed case. +// +// Example LogsQL: `fieldName:i(prefix*)` or `fieldName:i("some prefix"*)` +// +// A special case `fieldName:i(*)` equals to `fieldName:*` and matches non-emtpy value for the given `fieldName` field. +type anyCasePrefixFilter struct { + fieldName string + prefix string + + tokensOnce sync.Once + tokens []string +} + +func (pf *anyCasePrefixFilter) String() string { + if pf.prefix == "" { + return quoteFieldNameIfNeeded(pf.fieldName) + "i(*)" + } + return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.prefix)) +} + +func (pf *anyCasePrefixFilter) getTokens() []string { + pf.tokensOnce.Do(pf.initTokens) + return pf.tokens +} + +func (pf *anyCasePrefixFilter) initTokens() { + pf.tokens = getTokensSkipLast(pf.prefix) +} + +func (pf *anyCasePrefixFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[pf.fieldName] = struct{}{} +} + +func (pf *anyCasePrefixFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := pf.fieldName + prefixLowercase := strings.ToLower(pf.prefix) + + // Verify whether pf matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchAnyCasePrefix(v, prefixLowercase) { + bm.resetBits() + } + return + } + + // Verify whether pf matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + bm.resetBits() + return + } + + tokens := pf.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByAnyCasePrefix(bs, ch, bm, prefixLowercase) + case valueTypeDict: + matchValuesDictByAnyCasePrefix(bs, ch, bm, prefixLowercase) + case valueTypeUint8: + matchUint8ByPrefix(bs, ch, bm, prefixLowercase) + case valueTypeUint16: + matchUint16ByPrefix(bs, ch, bm, prefixLowercase) + case valueTypeUint32: + matchUint32ByPrefix(bs, ch, bm, prefixLowercase) + case valueTypeUint64: + matchUint64ByPrefix(bs, ch, bm, prefixLowercase) + case valueTypeFloat64: + matchFloat64ByPrefix(bs, ch, bm, prefixLowercase, tokens) + case valueTypeIPv4: + matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens) + case valueTypeTimestampISO8601: + prefixUppercase := strings.ToUpper(pf.prefix) + matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// prefixFilter matches the given prefix. +// +// Example LogsQL: `fieldName:prefix*` or `fieldName:"some prefix"*` +// +// A special case `fieldName:*` matches non-empty value for the given `fieldName` field +type prefixFilter struct { + fieldName string + prefix string + + tokensOnce sync.Once + tokens []string +} + +func (pf *prefixFilter) String() string { + if pf.prefix == "" { + return quoteFieldNameIfNeeded(pf.fieldName) + "*" + } + return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.prefix)) +} + +func (pf *prefixFilter) getTokens() []string { + pf.tokensOnce.Do(pf.initTokens) + return pf.tokens +} + +func (pf *prefixFilter) initTokens() { + pf.tokens = getTokensSkipLast(pf.prefix) +} + +func (pf *prefixFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[pf.fieldName] = struct{}{} +} + +func (pf *prefixFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := pf.fieldName + prefix := pf.prefix + + // Verify whether pf matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchPrefix(v, prefix) { + bm.resetBits() + } + return + } + + // Verify whether pf matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + bm.resetBits() + return + } + + tokens := pf.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeDict: + matchValuesDictByPrefix(bs, ch, bm, prefix) + case valueTypeUint8: + matchUint8ByPrefix(bs, ch, bm, prefix) + case valueTypeUint16: + matchUint16ByPrefix(bs, ch, bm, prefix) + case valueTypeUint32: + matchUint32ByPrefix(bs, ch, bm, prefix) + case valueTypeUint64: + matchUint64ByPrefix(bs, ch, bm, prefix) + case valueTypeFloat64: + matchFloat64ByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeIPv4: + matchIPv4ByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByPrefix(bs, ch, bm, prefix, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// anyCasePhraseFilter filters field entries by case-insensitive phrase match. +// +// An example LogsQL query: `fieldName:i(word)` or `fieldName:i("word1 ... wordN")` +type anyCasePhraseFilter struct { + fieldName string + phrase string + + tokensOnce sync.Once + tokens []string +} + +func (pf *anyCasePhraseFilter) String() string { + return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(pf.fieldName), quoteTokenIfNeeded(pf.phrase)) +} + +func (pf *anyCasePhraseFilter) getTokens() []string { + pf.tokensOnce.Do(pf.initTokens) + return pf.tokens +} + +func (pf *anyCasePhraseFilter) initTokens() { + pf.tokens = tokenizeStrings(nil, []string{pf.phrase}) +} + +func (pf *anyCasePhraseFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[pf.fieldName] = struct{}{} +} + +func (pf *anyCasePhraseFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := pf.fieldName + phraseLowercase := strings.ToLower(pf.phrase) + + // Verify whether pf matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchAnyCasePhrase(v, phraseLowercase) { + bm.resetBits() + } + return + } + + // Verify whether pf matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty phrase. + if len(phraseLowercase) > 0 { + bm.resetBits() + } + return + } + + tokens := pf.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByAnyCasePhrase(bs, ch, bm, phraseLowercase) + case valueTypeDict: + matchValuesDictByAnyCasePhrase(bs, ch, bm, phraseLowercase) + case valueTypeUint8: + matchUint8ByExactValue(bs, ch, bm, phraseLowercase, tokens) + case valueTypeUint16: + matchUint16ByExactValue(bs, ch, bm, phraseLowercase, tokens) + case valueTypeUint32: + matchUint32ByExactValue(bs, ch, bm, phraseLowercase, tokens) + case valueTypeUint64: + matchUint64ByExactValue(bs, ch, bm, phraseLowercase, tokens) + case valueTypeFloat64: + matchFloat64ByPhrase(bs, ch, bm, phraseLowercase, tokens) + case valueTypeIPv4: + matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens) + case valueTypeTimestampISO8601: + phraseUppercase := strings.ToUpper(pf.phrase) + matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +// phraseFilter filters field entries by phrase match (aka full text search). +// +// A phrase consists of any number of words with delimiters between them. +// +// An empty phrase matches only an empty string. +// A single-word phrase is the simplest LogsQL query: `fieldName:word` +// +// Multi-word phrase is expressed as `fieldName:"word1 ... wordN"` in LogsQL. +// +// A special case `fieldName:""` matches any value without `fieldName` field. +type phraseFilter struct { + fieldName string + phrase string + + tokensOnce sync.Once + tokens []string +} + +func (pf *phraseFilter) String() string { + return quoteFieldNameIfNeeded(pf.fieldName) + quoteTokenIfNeeded(pf.phrase) +} + +func (pf *phraseFilter) getTokens() []string { + pf.tokensOnce.Do(pf.initTokens) + return pf.tokens +} + +func (pf *phraseFilter) initTokens() { + pf.tokens = tokenizeStrings(nil, []string{pf.phrase}) +} + +func (pf *phraseFilter) updateReferencedColumnNames(m map[string]struct{}) { + m[pf.fieldName] = struct{}{} +} + +func (pf *phraseFilter) apply(bs *blockSearch, bm *filterBitmap) { + fieldName := pf.fieldName + phrase := pf.phrase + + // Verify whether pf matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchPhrase(v, phrase) { + bm.resetBits() + } + return + } + + // Verify whether pf matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty phrase. + if len(phrase) > 0 { + bm.resetBits() + } + return + } + + tokens := pf.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeDict: + matchValuesDictByPhrase(bs, ch, bm, phrase) + case valueTypeUint8: + matchUint8ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint16: + matchUint16ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint32: + matchUint32ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint64: + matchUint64ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeFloat64: + matchFloat64ByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeIPv4: + matchIPv4ByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByPhrase(bs, ch, bm, phrase, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByLenRange(bm *filterBitmap, minLen, maxLen uint64) { + if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) { + bm.resetBits() + return + } +} + +func matchTimestampISO8601ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the timestamp values match an empty prefix aka `*` + return + } + // There is no sense in trying to parse prefix, since it may contain incomplete timestamp. + // We cannot compar binary representation of timestamp and need converting + // the timestamp to string before searching for the prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) == 1 { + matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens) + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + // Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchSequence(s, phrases) + }) + bbPool.Put(bb) +} + +func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + return + } + if prefix < "0" || prefix > "9" || !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) { + n, ok := tryParseTimestampISO8601(value) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + _, ok := tryParseTimestampISO8601(phrase) + if ok { + // Fast path - the phrase contains complete timestamp, so we can use exact search + matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) + return + } + + // Slow path - the phrase contains incomplete timestamp. Search over string representation of the timestamp. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchIPv4ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) { + if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) { + bm.resetBits() + return + } + + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint32(b) + return n >= minValue && n <= maxValue + }) +} + +func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the ipv4 values match an empty prefix aka `*` + return + } + // There is no sense in trying to parse prefix, since it may contain incomplete ip. + // We cannot compare binary representation of ip address and need converting + // the ip to string before searching for the prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) == 1 { + matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens) + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + // Slow path - phrases contain parts of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`. + // We cannot compare binary represetnation of ip address and need converting + // the ip to string before searching for prefix there. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchSequence(s, phrases) + }) + bbPool.Put(bb) +} + +func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + return + } + if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) { + n, ok := tryParseIPv4(value) + if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint32(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + _, ok := tryParseIPv4(phrase) + if ok { + // Fast path - phrase contains the full IP address, so we can use exact matching + matchIPv4ByExactValue(bs, ch, bm, phrase, tokens) + return + } + + // Slow path - the phrase may contain a part of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`. + // We cannot compare binary represetnation of ip address and need converting + // the ip to string before searching for prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "+" { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if minLen > 24 || maxLen == 0 { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + if minValue > math.Float64frombits(ch.maxValue) || maxValue < math.Float64frombits(ch.minValue) { + bm.resetBits() + return + } + + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint64(b) + f := math.Float64frombits(n) + return f >= minValue && f <= maxValue + }) +} + +func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the float64 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the floating-point number. + // For example, `foo:12*` must match `12`, `123.456` and `-0.123`. + // This means we cannot search in binary representation of floating-point numbers. + // Instead, we need searching for the whole prefix in string representation + // of floating-point numbers :( + _, ok := tryParseFloat64(prefix) + if !ok && prefix != "." && prefix != "+" && prefix != "-" && !strings.HasPrefix(prefix, "e") && !strings.HasPrefix(prefix, "E") { + bm.resetBits() + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + // The phrase may contain a part of the floating-point number. + // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. + // This means we cannot search in binary representation of floating-point numbers. + // Instead, we need searching for the whole phrase in string representation + // of floating-point numbers :( + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchSequence(s, phrases) + }) + bbPool.Put(bb) +} + +func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if prefix == "" { + // An empty prefix matches all the values + return + } + if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) { + f, ok := tryParseFloat64(value) + if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) { + bm.resetBits() + return + } + n := math.Float64bits(f) + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + // The phrase may contain a part of the floating-point number. + // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. + // This means we cannot search in binary representation of floating-point numbers. + // Instead, we need searching for the whole phrase in string representation + // of floating-point numbers :( + _, ok := tryParseFloat64(phrase) + if !ok && phrase != "." && phrase != "+" && phrase != "-" { + bm.resetBits() + return + } + if n := strings.IndexByte(phrase, '.'); n > 0 && n < len(phrase)-1 { + // Fast path - the phrase contains the exact floating-point number, so we can use exact search + matchFloat64ByExactValue(bs, ch, bm, phrase, tokens) + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchIPv4Range(v, minValue, maxValue) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchStringRange(v, minValue, maxValue) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchLenRange(v, minLen, maxLen) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchRange(v, minValue, maxValue) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if re.MatchString(v) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefixLowercase string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchAnyCasePrefix(v, prefixLowercase) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phraseLowercase string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchAnyCasePhrase(v, phraseLowercase) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchPrefix(v, prefix) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases []string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchSequence(v, phrases) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchExactPrefix(v, prefix) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if v == value { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, values map[string]struct{}) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if _, ok := values[v]; ok { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchPhrase(v, phrase) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *filterBitmap, encodedValues []byte) { + if len(encodedValues) == 0 { + // Fast path - the phrase is missing in the valuesDict + bm.resetBits() + return + } + // Slow path - iterate over values + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v)) + } + n := bytes.IndexByte(encodedValues, v[0]) + return n >= 0 + }) +} + +func matchStringByIPv4Range(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue uint32) { + visitValues(bs, ch, bm, func(v string) bool { + return matchIPv4Range(v, minValue, maxValue) + }) +} + +func matchStringByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + visitValues(bs, ch, bm, func(v string) bool { + return matchStringRange(v, minValue, maxValue) + }) +} + +func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + visitValues(bs, ch, bm, func(v string) bool { + return matchLenRange(v, minLen, maxLen) + }) +} + +func matchStringByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + visitValues(bs, ch, bm, func(v string) bool { + return matchRange(v, minValue, maxValue) + }) +} + +func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + visitValues(bs, ch, bm, func(v string) bool { + return re.MatchString(v) + }) +} + +func matchStringByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefixLowercase string) { + visitValues(bs, ch, bm, func(v string) bool { + return matchAnyCasePrefix(v, prefixLowercase) + }) +} + +func matchStringByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phraseLowercase string) { + visitValues(bs, ch, bm, func(v string) bool { + return matchAnyCasePhrase(v, phraseLowercase) + }) +} + +func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchPrefix(v, prefix) + }) +} + +func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases []string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchSequence(v, phrases) + }) +} + +func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchExactPrefix(v, prefix) + }) +} + +func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, value string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return v == value + }) +} + +func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchPhrase(v, phrase) + }) +} + +func matchUint8ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchUint16ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchUint32ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchUint64ByStringRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue string) { + if minValue > "9" || maxValue < "0" { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchStringRange(s, minValue, maxValue) + }) + bbPool.Put(bb) +} + +func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { + bb := bbPool.Get() + defer bbPool.Put(bb) + + bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10) + s := bytesutil.ToUnsafeString(bb.B) + if maxLen < uint64(len(s)) { + return false + } + bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10) + s = bytesutil.ToUnsafeString(bb.B) + return minLen <= uint64(len(s)) +} + +func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + minValueUint, maxValueUint := toUint64Range(minValue, maxValue) + if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v)) + } + n := uint64(v[0]) + return n >= minValueUint && n <= maxValueUint + }) + bbPool.Put(bb) +} + +func matchUint16ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + minValueUint, maxValueUint := toUint64Range(minValue, maxValue) + if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 2 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint16(b)) + return n >= minValueUint && n <= maxValueUint + }) + bbPool.Put(bb) +} + +func matchUint32ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + minValueUint, maxValueUint := toUint64Range(minValue, maxValue) + if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint32(b)) + return n >= minValueUint && n <= maxValueUint + }) + bbPool.Put(bb) +} + +func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *filterBitmap, minValue, maxValue float64) { + minValueUint, maxValueUint := toUint64Range(minValue, maxValue) + if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue { + bm.resetBits() + return + } + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint64(b) + return n >= minValueUint && n <= maxValueUint + }) + bbPool.Put(bb) +} + +func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *filterBitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint8ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint8 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint16ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint16 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint32ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint32 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint64 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) > 1 { + bm.resetBits() + return + } + matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens) +} + +func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) > 1 { + bm.resetBits() + return + } + matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens) +} + +func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) > 1 { + bm.resetBits() + return + } + matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens) +} + +func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrases, tokens []string) { + if len(phrases) > 1 { + bm.resetBits() + return + } + matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens) +} + +func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) { + if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchExactPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchMinMaxExactPrefix(ch *columnHeader, bm *filterBitmap, prefix string, tokens []string) bool { + if prefix == "" { + // An empty prefix matches all the values + return false + } + if len(tokens) > 0 { + // Non-empty tokens means that the prefix contains at least two tokens. + // Multiple tokens cannot match any uint value. + bm.resetBits() + return false + } + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return false + } + return true +} + +func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = append(bb.B, byte(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint16(bb.B, uint16(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint32(bb.B, uint32(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, binValue []byte, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return v == string(binValue) + }) +} + +func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *filterBitmap, values map[string]struct{}, tokenSets [][]string) { + if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + _, ok := values[v] + return ok + }) +} + +func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets [][]string) bool { + if len(tokenSets) == 0 { + return false + } + if len(tokenSets) > maxTokenSetsToInit || uint64(len(tokenSets)) > 10*bs.bsw.bh.rowsCount { + // It is faster to match every row in the block against all the values + // instead of using bloom filter for too big number of tokenSets. + return true + } + bf := bs.getBloomFilterForColumn(ch) + for _, tokens := range tokenSets { + if bf.containsAll(tokens) { + return true + } + } + return false +} + +func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { + if len(tokens) == 0 { + return true + } + bf := bs.getBloomFilterForColumn(ch) + return bf.containsAll(tokens) +} + +func visitValues(bs *blockSearch, ch *columnHeader, bm *filterBitmap, f func(value string) bool) { + if bm.isZero() { + // Fast path - nothing to visit + return + } + values := bs.getValuesForColumn(ch) + bm.forEachSetBit(func(idx int) bool { + return f(values[idx]) + }) +} + +func matchAnyCasePrefix(s, prefixLowercase string) bool { + sLowercase := strings.ToLower(s) + return matchPrefix(sLowercase, prefixLowercase) +} + +func matchPrefix(s, prefix string) bool { + if len(prefix) == 0 { + return len(s) > 0 + } + r, _ := utf8.DecodeRuneInString(prefix) + startsWithToken := isTokenRune(r) + offset := 0 + for { + n := strings.Index(s[offset:], prefix) + if n < 0 { + return false + } + offset += n + // Make sure that the found phrase contains non-token chars at the beginning + if startsWithToken && offset > 0 { + r, _ := utf8.DecodeLastRuneInString(s[:offset]) + if r == utf8.RuneError || isTokenRune(r) { + offset++ + continue + } + } + return true + } +} + +func matchIPv4Range(s string, minValue, maxValue uint32) bool { + n, ok := tryParseIPv4(s) + if !ok { + return false + } + return n >= minValue && n <= maxValue +} + +func matchStringRange(s, minValue, maxValue string) bool { + return s >= minValue && s < maxValue +} + +func matchLenRange(s string, minLen, maxLen uint64) bool { + sLen := uint64(utf8.RuneCountInString(s)) + return sLen >= minLen && sLen <= maxLen +} + +func matchRange(s string, minValue, maxValue float64) bool { + f, ok := tryParseFloat64(s) + if !ok { + return false + } + return f >= minValue && f <= maxValue +} + +func matchSequence(s string, phrases []string) bool { + for _, phrase := range phrases { + n := getPhrasePos(s, phrase) + if n < 0 { + return false + } + s = s[n+len(phrase):] + } + return true +} + +func matchAnyCasePhrase(s, phraseLowercase string) bool { + sLowercase := strings.ToLower(s) + return matchPhrase(sLowercase, phraseLowercase) +} + +func matchExactPrefix(s, prefix string) bool { + return strings.HasPrefix(s, prefix) +} + +func matchPhrase(s, phrase string) bool { + if len(phrase) == 0 { + return len(s) == 0 + } + n := getPhrasePos(s, phrase) + return n >= 0 +} + +func getPhrasePos(s, phrase string) int { + r, _ := utf8.DecodeRuneInString(phrase) + startsWithToken := isTokenRune(r) + r, _ = utf8.DecodeLastRuneInString(phrase) + endsWithToken := isTokenRune(r) + pos := 0 + for { + n := strings.Index(s[pos:], phrase) + if n < 0 { + return -1 + } + pos += n + // Make sure that the found phrase contains non-token chars at the beginning and at the end + if startsWithToken && pos > 0 { + r, _ := utf8.DecodeLastRuneInString(s[:pos]) + if r == utf8.RuneError || isTokenRune(r) { + pos++ + continue + } + } + if endsWithToken && pos+len(phrase) < len(s) { + r, _ := utf8.DecodeRuneInString(s[pos+len(phrase):]) + if r == utf8.RuneError || isTokenRune(r) { + pos++ + continue + } + } + return pos + } +} + +type stringBucket struct { + a []string +} + +func (sb *stringBucket) reset() { + a := sb.a + for i := range a { + a[i] = "" + } + sb.a = a[:0] +} + +func getStringBucket() *stringBucket { + v := stringBucketPool.Get() + if v == nil { + return &stringBucket{} + } + return v.(*stringBucket) +} + +func putStringBucket(sb *stringBucket) { + sb.reset() + stringBucketPool.Put(sb) +} + +var stringBucketPool sync.Pool + +func getTokensSkipLast(s string) []string { + for { + r, runeSize := utf8.DecodeLastRuneInString(s) + if !isTokenRune(r) { + break + } + s = s[:len(s)-runeSize] + } + return tokenizeStrings(nil, []string{s}) +} + +func toUint64Range(minValue, maxValue float64) (uint64, uint64) { + minValue = math.Ceil(minValue) + maxValue = math.Floor(maxValue) + return toUint64Clamp(minValue), toUint64Clamp(maxValue) +} + +func toUint64Clamp(f float64) uint64 { + if f < 0 { + return 0 + } + if f > (1<<64)-1 { + return (1 << 64) - 1 + } + return uint64(f) +} + +func quoteFieldNameIfNeeded(s string) string { + if s == "_msg" || s == "" { + return "" + } + return quoteTokenIfNeeded(s) + ":" +} + +func toUint8String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v)) + } + n := uint64(v[0]) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + return bytesutil.ToUnsafeString(bb.B) +} + +func toUint16String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 2 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint16(b)) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + return bytesutil.ToUnsafeString(bb.B) +} + +func toUint32String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := uint64(encoding.UnmarshalUint32(b)) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + return bytesutil.ToUnsafeString(bb.B) +} + +func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint64(b) + bb.B = strconv.AppendUint(bb.B[:0], n, 10) + return bytesutil.ToUnsafeString(bb.B) +} + +func toFloat64StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v)) + } + bb.B = toFloat64String(bb.B[:0], v) + return bytesutil.ToUnsafeString(bb.B) +} + +func toIPv4StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) + } + bb.B = toIPv4String(bb.B[:0], v) + return bytesutil.ToUnsafeString(bb.B) +} + +func toTimestampISO8601StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string { + if len(v) != 8 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v)) + } + bb.B = toTimestampISO8601String(bb.B[:0], v) + return bytesutil.ToUnsafeString(bb.B) +} diff --git a/lib/logstorage/filters_test.go b/lib/logstorage/filters_test.go new file mode 100644 index 000000000..cf7d6e782 --- /dev/null +++ b/lib/logstorage/filters_test.go @@ -0,0 +1,9296 @@ +package logstorage + +import ( + "fmt" + "math" + "reflect" + "regexp" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" +) + +func TestMatchLenRange(t *testing.T) { + f := func(s string, minLen, maxLen uint64, resultExpected bool) { + t.Helper() + result := matchLenRange(s, minLen, maxLen) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", 0, 0, true) + f("", 0, 1, true) + f("", 1, 1, false) + + f("abc", 0, 2, false) + f("abc", 0, 3, true) + f("abc", 0, 4, true) + f("abc", 3, 4, true) + f("abc", 4, 4, false) + f("abc", 4, 2, false) + + f("ФЫВА", 3, 3, false) + f("ФЫВА", 4, 4, true) + f("ФЫВА", 5, 5, false) + f("ФЫВА", 0, 10, true) +} + +func TestMatchPhrase(t *testing.T) { + f := func(s, phrase string, resultExpected bool) { + t.Helper() + result := matchPhrase(s, phrase) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", "", true) + f("foo", "", false) + f("", "foo", false) + f("foo", "foo", true) + f("foo bar", "foo", true) + f("foo bar", "bar", true) + f("a foo bar", "foo", true) + f("a foo bar", "fo", false) + f("a foo bar", "oo", false) + f("foobar", "foo", false) + f("foobar", "bar", false) + f("foobar", "oob", false) + f("afoobar foo", "foo", true) + f("раз два (три!)", "три", true) + f("", "foo bar", false) + f("foo bar", "foo bar", true) + f("(foo bar)", "foo bar", true) + f("afoo bar", "foo bar", false) + f("afoo bar", "afoo ba", false) + f("foo bar! baz", "foo bar!", true) + f("a.foo bar! baz", ".foo bar! ", true) + f("foo bar! baz", "foo bar! b", false) + f("255.255.255.255", "5", false) + f("255.255.255.255", "55", false) + f("255.255.255.255", "255", true) + f("255.255.255.255", "5.255", false) + f("255.255.255.255", "255.25", false) + f("255.255.255.255", "255.255", true) +} + +func TestMatchPrefix(t *testing.T) { + f := func(s, prefix string, resultExpected bool) { + t.Helper() + result := matchPrefix(s, prefix) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", "", false) + f("foo", "", true) + f("", "foo", false) + f("foo", "foo", true) + f("foo bar", "foo", true) + f("foo bar", "bar", true) + f("a foo bar", "foo", true) + f("a foo bar", "fo", true) + f("a foo bar", "oo", false) + f("foobar", "foo", true) + f("foobar", "bar", false) + f("foobar", "oob", false) + f("afoobar foo", "foo", true) + f("раз два (три!)", "три", true) + f("", "foo bar", false) + f("foo bar", "foo bar", true) + f("(foo bar)", "foo bar", true) + f("afoo bar", "foo bar", false) + f("afoo bar", "afoo ba", true) + f("foo bar! baz", "foo bar!", true) + f("a.foo bar! baz", ".foo bar! ", true) + f("foo bar! baz", "foo bar! b", true) + f("255.255.255.255", "5", false) + f("255.255.255.255", "55", false) + f("255.255.255.255", "255", true) + f("255.255.255.255", "5.255", false) + f("255.255.255.255", "255.25", true) + f("255.255.255.255", "255.255", true) +} + +func TestMatchSequence(t *testing.T) { + f := func(s string, phrases []string, resultExpected bool) { + t.Helper() + result := matchSequence(s, phrases) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", []string{""}, true) + f("foo", []string{""}, true) + f("", []string{"foo"}, false) + f("foo", []string{"foo"}, true) + f("foo bar", []string{"foo"}, true) + f("foo bar", []string{"bar"}, true) + f("foo bar", []string{"foo bar"}, true) + f("foo bar", []string{"foo", "bar"}, true) + f("foo bar", []string{"foo", " bar"}, true) + f("foo bar", []string{"foo ", "bar"}, true) + f("foo bar", []string{"foo ", " bar"}, false) + f("foo bar", []string{"bar", "foo"}, false) +} + +func TestMatchStringRange(t *testing.T) { + f := func(s, minValue, maxValue string, resultExpected bool) { + t.Helper() + result := matchStringRange(s, minValue, maxValue) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("foo", "a", "b", false) + f("foo", "a", "foa", false) + f("foo", "a", "foz", true) + f("foo", "foo", "foo", false) + f("foo", "foo", "fooa", true) + f("foo", "fooa", "foo", false) +} + +func TestMatchIPv4Range(t *testing.T) { + f := func(s string, minValue, maxValue uint32, resultExpected bool) { + t.Helper() + result := matchIPv4Range(s, minValue, maxValue) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + // Invalid IP + f("", 0, 1000, false) + f("123", 0, 1000, false) + + // range mismatch + f("0.0.0.1", 2, 100, false) + f("127.0.0.1", 0x6f000000, 0x7f000000, false) + + // range match + f("0.0.0.1", 1, 1, true) + f("0.0.0.1", 0, 100, true) + f("127.0.0.1", 0x7f000000, 0x7f000001, true) +} + +func TestFilterBitmap(t *testing.T) { + for i := 0; i < 100; i++ { + bm := getFilterBitmap(i) + if bm.bitsLen != i { + t.Fatalf("unexpected bits length: %d; want %d", bm.bitsLen, i) + } + + // Make sure that all the bits are set. + nextIdx := 0 + bm.forEachSetBit(func(idx int) bool { + if idx >= i { + t.Fatalf("index must be smaller than %d", i) + } + if idx != nextIdx { + t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx) + } + nextIdx++ + return true + }) + + // Clear a part of bits + bm.forEachSetBit(func(idx int) bool { + return idx%2 != 0 + }) + nextIdx = 1 + bm.forEachSetBit(func(idx int) bool { + if idx != nextIdx { + t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx) + } + nextIdx += 2 + return true + }) + + // Clear all the bits + bm.forEachSetBit(func(idx int) bool { + return false + }) + bitsCount := 0 + bm.forEachSetBit(func(idx int) bool { + bitsCount++ + return true + }) + if bitsCount != 0 { + t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount) + } + + putFilterBitmap(bm) + } +} + +func TestComplexFilters(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a", + "a foobar abcdef", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // (foobar AND NOT baz AND (abcdef OR xyz)) + f := &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foobar", + }, + ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "baz", + }, + }, + &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abcdef", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "xyz", + }, + }, + }, + }, + } + testFilterMatchForColumns(t, columns, f, "foo", []int{6}) + + // (foobaz AND NOT baz AND (abcdef OR xyz)) + f = &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foobaz", + }, + ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "baz", + }, + }, + &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abcdef", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "xyz", + }, + }, + }, + }, + } + testFilterMatchForColumns(t, columns, f, "foo", nil) + + // (foobaz AND NOT baz AND (abcdef OR xyz OR a)) + f = &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foobar", + }, + ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "baz", + }, + }, + &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abcdef", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "xyz", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + }, + }, + } + testFilterMatchForColumns(t, columns, f, "foo", []int{1, 6}) + + // (foobaz AND NOT qwert AND (abcdef OR xyz OR a)) + f = &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foobar", + }, + ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "qwert", + }, + }, + &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abcdef", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "xyz", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + }, + }, + } + testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6}) +} + +func TestOrFilter(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a", + "a foobar abcdef", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // non-empty union + of := &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{2, 6, 9}) + + // reverse non-empty union + of = &orFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{2, 6, 9}) + + // first empty result, second non-empty result + of = &orFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{9}) + + // first non-empty result, second empty result + of = &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{9}) + + // first match all + of = &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // second match all + of = &orFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "23", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // both empty results + of = &orFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "x23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + }, + } + testFilterMatchForColumns(t, columns, of, "foo", nil) +} + +func TestAndFilter(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "", + "a foobar abcdef", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // non-empty intersection + af := &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{2, 6}) + + // reverse non-empty intersection + af = &andFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{2, 6}) + + // the first filter mismatch + af = &andFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "bc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + // the last filter mismatch + af = &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abc", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "foo", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + // empty intersection + af = &andFilter{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foo", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + // reverse empty intersection + af = &andFilter{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "foo", + }, + }, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) +} + +func TestNotFilter(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + nf := ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 6, 7, 8, 9}) + + nf = ¬Filter{ + f: &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", []int{5}) + + nf = ¬Filter{ + f: &phraseFilter{ + fieldName: "non-existing-field", + phrase: "foobar", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + nf = ¬Filter{ + f: &prefixFilter{ + fieldName: "non-existing-field", + prefix: "", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + nf = ¬Filter{ + f: &prefixFilter{ + fieldName: "foo", + prefix: "", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", []int{5}) + + // mismatch + nf = ¬Filter{ + f: &phraseFilter{ + fieldName: "non-existing-field", + phrase: "", + }, + } + testFilterMatchForColumns(t, columns, nf, "foo", nil) +} + +func TestTimeFilter(t *testing.T) { + timestamps := []int64{ + 1, + 9, + 123, + 456, + 789, + } + + // match + tf := &timeFilter{ + minTimestamp: -10, + maxTimestamp: 1, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{0}) + + tf = &timeFilter{ + minTimestamp: -10, + maxTimestamp: 10, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{0, 1}) + + tf = &timeFilter{ + minTimestamp: 1, + maxTimestamp: 1, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{0}) + + tf = &timeFilter{ + minTimestamp: 2, + maxTimestamp: 456, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{1, 2, 3}) + + tf = &timeFilter{ + minTimestamp: 2, + maxTimestamp: 457, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{1, 2, 3}) + + tf = &timeFilter{ + minTimestamp: 120, + maxTimestamp: 788, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3}) + + tf = &timeFilter{ + minTimestamp: 120, + maxTimestamp: 789, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3, 4}) + + tf = &timeFilter{ + minTimestamp: 120, + maxTimestamp: 10000, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{2, 3, 4}) + + tf = &timeFilter{ + minTimestamp: 789, + maxTimestamp: 1000, + } + testFilterMatchForTimestamps(t, timestamps, tf, []int{4}) + + // mismatch + tf = &timeFilter{ + minTimestamp: -1000, + maxTimestamp: 0, + } + testFilterMatchForTimestamps(t, timestamps, tf, nil) + + tf = &timeFilter{ + minTimestamp: 790, + maxTimestamp: 1000, + } + testFilterMatchForTimestamps(t, timestamps, tf, nil) +} + +func TestStreamFilter(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // Match + f := &exactFilter{ + fieldName: "job", + value: "foobar", + } + testFilterMatchForColumns(t, columns, f, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // Mismatch + f = &exactFilter{ + fieldName: "job", + value: "abc", + } + testFilterMatchForColumns(t, columns, f, "foo", nil) +} + +func TestSequenceFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"def"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc def"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc ", "", "def", ""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"ab"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc", "abc"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc", "def", "foo"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc", " def"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc ", ""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{"", ""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"abc def ", "foobar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "non-existing column", + phrases: []string{"x", "yz"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "baz foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"foobar", "baz"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{3}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"baz", "aaaa"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "non-existing column", + phrases: []string{"foobar", "aaaa"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a bb foo", + "bb a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar bb", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"a", "bb"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 6}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"НГКШ", " "}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{8}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"!,", "(!1)"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{9}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"aa a", "bcdasqq"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"@", "!!!!"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"12"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", "bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234", "567"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "256", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"12"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", "bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234", "567"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "65536", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"12"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", "bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234", "567"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "12345678901", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"12"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{1, 5}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", "bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234", "567"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"-", "65536"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{3}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"1234.", "5678901"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{4}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", "5678901"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{4}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"", ""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"65536", "-"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"5678901", "1234"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"12345678901234567890"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "1.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "foo", + phrases: []string{"127.0.0.1"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"127", "1"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"127.0.0"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{2, 4, 5, 7}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"2.3", ".4"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"5"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"127.", "1", "1", "345"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"27.0"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + + sf = &sequenceFilter{ + fieldName: "foo", + phrases: []string{"255.255.255.255"}, + } + testFilterMatchForColumns(t, columns, sf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + sf := &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"2006-01-02T15:04:05.005Z"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{4}) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"2006-01", "04:05."}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"2006", "002Z"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{1}) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + sf = &sequenceFilter{ + fieldName: "non-existing-column", + phrases: []string{""}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", nil) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"002Z", "2006"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", nil) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"2006-04-02T15:04:05.005Z", "2023"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", nil) + + sf = &sequenceFilter{ + fieldName: "_msg", + phrases: []string{"06"}, + } + testFilterMatchForColumns(t, columns, sf, "_msg", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "foobarbaz", + "foobar", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5, 6}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "baz", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "non-existing column", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "aa fddf foobarbaz", + "a afoobarbaz", + "a foobar baz", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "aa ", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4}) + + ef = &exactPrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "aa b", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "fobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "non-existing-column", + prefix: "aa", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "999", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "7", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "467", + "5", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "999", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "7", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "65536", + "5", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "99999", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "7", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "123456789012", + "5", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 5}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "1234567890123", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "7", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 4}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "1234.567", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{4}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "-65536", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{3}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "6511", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.2", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "127.0.", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4, 5, 7}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "255", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:06.004Z", + "2006-01-02T15:04:06.005Z", + "2006-01-02T15:04:07.006Z", + "2006-01-02T15:04:10.007Z", + "2006-01-02T15:04:12.008Z", + "2006-01-02T15:04:15.009Z", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05", + } + testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2}) + + ef = &exactPrefixFilter{ + fieldName: "_msg", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + ef = &exactPrefixFilter{ + fieldName: "_msg", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, ef, "_msg", nil) + + ef = &exactPrefixFilter{ + fieldName: "_msg", + prefix: "0", + } + testFilterMatchForColumns(t, columns, ef, "_msg", nil) + }) +} + +func TestExactPrefixFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "abc d", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + ef = &exactPrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "xabc", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "non-existing column", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + } + + // match + ef := &exactPrefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + ef = &exactPrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + // mismatch + ef = &exactPrefixFilter{ + fieldName: "foo", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactPrefixFilter{ + fieldName: "non-existing column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + +} + +func TestExactFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "abc def", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "abc", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "non-existing column", + value: "abc", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "abc def", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "non-existing column", + value: "x", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 6}) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "baz", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "non-existing column", + value: "foobar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar baz", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "aa abc a", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{2}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "aa a", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "fooaaazz a", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "256", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "65536", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "12345678901", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{1, 5}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "1234", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0}) + + ef = &exactFilter{ + fieldName: "foo", + value: "1234.5678901", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{4}) + + ef = &exactFilter{ + fieldName: "foo", + value: "-65536", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{3}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "65536", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "123", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "foo", + value: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{2, 4, 5, 7}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + ef = &exactFilter{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "127.0", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + + ef = &exactFilter{ + fieldName: "foo", + value: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, ef, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + ef := &exactFilter{ + fieldName: "_msg", + value: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, ef, "_msg", []int{4}) + + ef = &exactFilter{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + ef = &exactFilter{ + fieldName: "_msg", + value: "bar", + } + testFilterMatchForColumns(t, columns, ef, "_msg", nil) + + ef = &exactFilter{ + fieldName: "_msg", + value: "", + } + testFilterMatchForColumns(t, columns, ef, "_msg", nil) + + ef = &exactFilter{ + fieldName: "_msg", + value: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, ef, "_msg", nil) + }) +} + +func TestInFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + { + name: "other column", + values: []string{ + "asdfdsf", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"abc def", "abc", "foobar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0}) + + af = &inFilter{ + fieldName: "other column", + values: []string{"asdfdsf", ""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{"", "foo"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"abc", "def"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"", "abc"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "other column", + values: []string{"sd"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "non-existing column", + values: []string{"abc", "def"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"aaaa", "abc def", "foobar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{"", "abc"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"abc def ", "foobar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "non-existing column", + values: []string{"x"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"foobar", "aaaa", "abc", "baz"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 6}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"bbbb", "", "aaaa"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar", "aaaa"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "non-existing column", + values: []string{"foobar", "aaaa"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"a foobar", "aa abc a"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 6}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"aa a"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"12", "32"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"0"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"33"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"1234"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "256", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"12", "32"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"0"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"33"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"123456"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "65536", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"12", "32"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"0"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"33"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"12345678901"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "12345678901", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"12", "32"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{1, 2, 5}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"0"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{3, 4}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"33"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"1234", "1", "foobar", "123211"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 5}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"1234.5678901"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{4}) + + af = &inFilter{ + fieldName: "foo", + values: []string{"-65536"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{3}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"65536"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"123"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"12345678901234567890"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "foo", + values: []string{"127.0.0.1", "24.54.1.2", "127.0.4.2"}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{2, 4, 5, 6, 7}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + af = &inFilter{ + fieldName: "foo", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"5"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + + af = &inFilter{ + fieldName: "foo", + values: []string{"255.255.255.255"}, + } + testFilterMatchForColumns(t, columns, af, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + af := &inFilter{ + fieldName: "_msg", + values: []string{"2006-01-02T15:04:05.005Z", "foobar"}, + } + testFilterMatchForColumns(t, columns, af, "_msg", []int{4}) + + af = &inFilter{ + fieldName: "non-existing-column", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + af = &inFilter{ + fieldName: "_msg", + values: []string{"bar"}, + } + testFilterMatchForColumns(t, columns, af, "_msg", nil) + + af = &inFilter{ + fieldName: "_msg", + values: []string{}, + } + testFilterMatchForColumns(t, columns, af, "_msg", nil) + + af = &inFilter{ + fieldName: "_msg", + values: []string{""}, + } + testFilterMatchForColumns(t, columns, af, "_msg", nil) + + af = &inFilter{ + fieldName: "_msg", + values: []string{"2006-04-02T15:04:05.005Z"}, + } + testFilterMatchForColumns(t, columns, af, "_msg", nil) + }) +} + +func TestRegexpFilter(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "127.0.0.1", + "127.0.0.1", + "127.0.0.1", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("0.0"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile(`^127\.0\.0\.1$`), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = ®expFilter{ + fieldName: "non-existing-column", + re: regexp.MustCompile("foo.+bar|"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo.+bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = ®expFilter{ + fieldName: "non-existing-column", + re: regexp.MustCompile("foo.+bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "127.0.0.1", + "Abc", + "127.255.255.255", + "10.4", + "foo 127.0.0.1", + "127.0.0.1 bar", + "127.0.0.1", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar|^$"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 5, 6}) + + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("27.0"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 5, 6, 7}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("bar.+foo"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "127.0.0.1", + "20", + "15.5", + "-5", + "a fooBaR", + "a 127.0.0.1 dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("(?i)foo|йцу"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 6, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("qwe.+rty|^$"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65535", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65536", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12345678901", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2, 5, 6, 7, 8}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("127.0.[40].(1|2)"), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 4, 5, 6, 7}) + + // mismatch + rf = ®expFilter{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar|834"), + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + rf := ®expFilter{ + fieldName: "_msg", + re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"), + } + testFilterMatchForColumns(t, columns, rf, "_msg", []int{1, 4}) + + // mismatch + rf = ®expFilter{ + fieldName: "_msg", + re: regexp.MustCompile("^01|04$"), + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + }) +} + +func TestStringRangeFilter(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "127.0.0.1", + "127.0.0.1", + "127.0.0.1", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.1", + maxValue: "255.", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.1", + maxValue: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "", + maxValue: "127.0.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "non-existing-column", + minValue: "1", + maxValue: "2", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.2", + maxValue: "", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "127.0.0.1", + "Abc", + "127.255.255.255", + "10.4", + "foo 127.0.0.1", + "127.0.0.1 bar", + "127.0.0.1", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.0", + maxValue: "128.0.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 6, 7}) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "127", + maxValue: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 7}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "0", + maxValue: "10", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.2", + maxValue: "127.127.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "128.0.0.0", + maxValue: "127.0.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "127.0.0.1", + "20", + "15.5", + "-5", + "a fooBaR", + "a 127.0.0.1 dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0.1", + maxValue: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "0", + maxValue: "10", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "255.255.255.255", + maxValue: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "33", + maxValue: "5", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "100", + maxValue: "101", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "5", + maxValue: "33", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65535", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "33", + maxValue: "5", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "100", + maxValue: "101", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "5", + maxValue: "33", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65536", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "33", + maxValue: "5", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "100", + maxValue: "101", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "5", + maxValue: "33", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12345678901", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "33", + maxValue: "5", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "100", + maxValue: "101", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "5", + maxValue: "33", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "33", + maxValue: "5", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{9, 10}) + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "-0", + maxValue: "-1", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{6}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "100", + maxValue: "101", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "5", + maxValue: "33", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "foo", + minValue: "127.0.0", + maxValue: "128.0.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 4, 5, 6, 7}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "128.0.0.0", + maxValue: "129.0.0.0", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "255.0.0.0", + maxValue: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &stringRangeFilter{ + fieldName: "foo", + minValue: "128.0.0.0", + maxValue: "", + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2005-01-02T15:04:05.001Z", + "2006-02-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2026-01-02T15:04:05.005Z", + "2026-01-02T15:04:05.006Z", + "2026-01-02T15:04:05.007Z", + "2026-01-02T15:04:05.008Z", + "2026-01-02T15:04:05.009Z", + }, + }, + } + + // match + rf := &stringRangeFilter{ + fieldName: "_msg", + minValue: "2006-01-02", + maxValue: "2006-01-03", + } + testFilterMatchForColumns(t, columns, rf, "_msg", []int{2, 3}) + + rf = &stringRangeFilter{ + fieldName: "_msg", + minValue: "", + maxValue: "2006", + } + testFilterMatchForColumns(t, columns, rf, "_msg", []int{0}) + + // mismatch + rf = &stringRangeFilter{ + fieldName: "_msg", + minValue: "3", + maxValue: "4", + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + + rf = &stringRangeFilter{ + fieldName: "_msg", + minValue: "a", + maxValue: "b", + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + + rf = &stringRangeFilter{ + fieldName: "_msg", + minValue: "2006-01-03", + maxValue: "2006-01-02", + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + }) +} + +func TestIPv4RangeFilter(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "127.0.0.1", + "127.0.0.1", + "127.0.0.1", + }, + }, + } + + // match + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0x80000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x7f000001, + maxValue: 0x7f000001, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + // mismatch + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0x7f000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "non-existing-column", + minValue: 0, + maxValue: 20000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x80000000, + maxValue: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "127.0.0.1", + "Abc", + "127.255.255.255", + "10.4", + "foo 127.0.0.1", + "127.0.0.1 bar", + "127.0.0.1", + }, + }, + } + + // match + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x7f000000, + maxValue: 0x80000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 7}) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0x7f000001, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 7}) + + // mismatch + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 1000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x7f000002, + maxValue: 0x7f7f0000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x80000000, + maxValue: 0x7f000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "127.0.0.1", + "20", + "15.5", + "-5", + "a fooBaR", + "a 127.0.0.1 dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x7f000000, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2}) + + // mismatch + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 10000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0xffffffff, + maxValue: 0x7f000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65535", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65536", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12345678901", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + rf := &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 0x08000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 11}) + + // mismatch + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x80000000, + maxValue: 0x90000000, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0xff000000, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &ipv4RangeFilter{ + fieldName: "foo", + minValue: 0x08000000, + maxValue: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // mismatch + rf := &ipv4RangeFilter{ + fieldName: "_msg", + minValue: 0, + maxValue: 0xffffffff, + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + }) +} + +func TestLenRangeFilter(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "10", + "10", + "10", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = &lenRangeFilter{ + fieldName: "non-existing-column", + minLen: 0, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 3, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &lenRangeFilter{ + fieldName: "non-existing-column", + minLen: 10, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "10", + "Abc", + "20", + "10.5", + "10 AFoobarbaz", + "foobar", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 2, 3}) + + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 0, + maxLen: 1, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 20, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "10", + "20", + "15.5", + "-5", + "a fooBaR", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 5}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 100, + maxLen: 200, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "256", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "65536", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123456789012", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 6}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 20, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 2}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 100, + maxLen: 200, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "foo", + minLen: 3, + maxLen: 7, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 11}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "foo", + minLen: 20, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + rf := &lenRangeFilter{ + fieldName: "_msg", + minLen: 10, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, rf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + rf = &lenRangeFilter{ + fieldName: "_msg", + minLen: 10, + maxLen: 11, + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + }) +} + +func TestRangeFilter(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "10", + "10", + "10", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: -10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10, + maxValue: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{0, 1, 2}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -10, + maxValue: 9.99, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 20, + maxValue: -10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10.1, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "non-existing-column", + minValue: 10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 11, + maxValue: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "10", + "Abc", + "20", + "10.5", + "10 AFoobarbaz", + "foobar", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: -10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 4}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{1, 3, 4}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10.1, + maxValue: 19.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{4}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -11, + maxValue: 0, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 11, + maxValue: 19, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 20.1, + maxValue: 100, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 20, + maxValue: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "10", + "20", + "15.5", + "-5", + "a fooBaR", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: -100, + maxValue: 100, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4, 5}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 10, + maxValue: 20, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{2, 3, 4}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -5, + maxValue: -5, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{5}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -10, + maxValue: -5.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 20.1, + maxValue: 100, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 20, + maxValue: 10, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: -0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 0.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 2.9, + maxValue: 0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65535", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: -0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 0.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 2.9, + maxValue: 0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65536", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: 0, + maxValue: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: -0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 0.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 2.9, + maxValue: 0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12345678901", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: math.Inf(-1), + maxValue: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{6, 7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 1000, + maxValue: math.Inf(1), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{5}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: -0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 0.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 2.9, + maxValue: 0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + rf := &rangeFilter{ + fieldName: "foo", + minValue: math.Inf(-1), + maxValue: 3, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 7, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 2.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{7}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: 1.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{3, 4, 6, 8}) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 1000, + maxValue: math.Inf(1), + } + testFilterMatchForColumns(t, columns, rf, "foo", []int{5}) + + // mismatch + rf = &rangeFilter{ + fieldName: "foo", + minValue: -1e18, + maxValue: -334.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 0.1, + maxValue: 0.9, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + + rf = &rangeFilter{ + fieldName: "foo", + minValue: 2.9, + maxValue: 0.1, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // range filter always mismatches ipv4 + rf := &rangeFilter{ + fieldName: "foo", + minValue: -100, + maxValue: 100, + } + testFilterMatchForColumns(t, columns, rf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // range filter always mismatches timestmap + rf := &rangeFilter{ + fieldName: "_msg", + minValue: -100, + maxValue: 100, + } + testFilterMatchForColumns(t, columns, rf, "_msg", nil) + }) +} + +func TestAnyCasePrefixFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "aBc DEf", + }, + }, + { + name: "other column", + values: []string{ + "aSDfdsf", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "ABC", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "other column", + prefix: "asdfdSF", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "other column", + prefix: "sd", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "x", + "X", + "X", + }, + }, + { + name: "foo", + values: []string{ + "abc def", + "ABC DEF", + "AbC Def", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "Abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "AB", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "abc de", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: " de", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "other-column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: " 2 ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "abc def ", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "other-column", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "fOObar", + "Abc", + "aFDf FooBar baz", + "fddf FOObarBAZ", + "AFoobarbaz", + "foobar", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "FooBar", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 4, 6}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 3, 4, 5, 6}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "ba", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a fOoBar", + "aA aBC A", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a fooBaR", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "нГк", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "aa a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "!,", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "aa ax", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "qwe rty abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "@", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 5}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "123456", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "0.0002", + "-320001", + "4", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "1234.5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "56789", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "-6553", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "7344.8943", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "-1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "+1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "23", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "678", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "127.0.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "2.3.", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + // mismatch + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "8", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "127.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "27.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "foo", + prefix: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + pf := &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02t15:04:05.005z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "2006-01-0", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "002", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) + + // mimatch + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "06", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + pf = &anyCasePrefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePrefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + }) +} + +func TestPrefixFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + { + name: "other column", + values: []string{ + "asdfdsf", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "other column", + prefix: "asdfdsf", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "other column", + prefix: "sd", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "x", + "x", + "x", + }, + }, + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "abc de", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: " de", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "other-column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: " 2 ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "abc def ", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "other-column", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 4, 6}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 3, 4, 5, 6}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "ba", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "НГК", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "aa a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "!,", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "aa ax", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "qwe rty abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "@", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 5}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "123456", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "1234.5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "56789", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "-6553", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "7344.8943", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "-1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "+1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "23", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "678", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "foo", + prefix: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8, 9}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "127.0.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "2.3.", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + // mismatch + pf = &prefixFilter{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "8", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "127.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "27.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "foo", + prefix: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + pf := &prefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "2006-01-0", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "002", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) + + // mimatch + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "06", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + pf = &prefixFilter{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &prefixFilter{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + }) +} + +func TestAnyCasePhraseFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "aBc DEf", + }, + }, + { + name: "other column", + values: []string{ + "aSDfdsF", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "Abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "other column", + phrase: "ASdfdsf", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "other column", + phrase: "sd", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing column", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "X", + "x", + "x", + }, + }, + { + name: "foo", + values: []string{ + "aBC def", + "abc DEf", + "Abc deF", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: " def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "other-column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: " 2 ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "abc def ", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "other-column", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "fooBar", + "ABc", + "afdf foobar BAz", + "fddf fOObARbaz", + "AfooBarbaz", + "foobar", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "FoobAr", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "baZ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing column", + phrase: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "A Foobar", + "aA aBC a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a aFOObarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "A", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "НгкШ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "!,", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "aa a", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "@", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "123456", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "1234.5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "-65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "-1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "+1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "5678", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "127", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "127.0.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "2.3", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "5", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "127.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "27.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "foo", + phrase: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + pf := &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02t15:04:05.005z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "2006-01", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "002Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) + + pf = &anyCasePhraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "06", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + pf = &anyCasePhraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + }) +} + +func TestPhraseFilter(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + { + name: "other column", + values: []string{ + "asdfdsf", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "other column", + phrase: "asdfdsf", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "other column", + phrase: "sd", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "non-existing column", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "x", + "x", + "x", + }, + }, + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: " def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "other-column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: " 2 ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "abc def ", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "other-column", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "non-existing column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "baz", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "non-existing column", + phrase: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "НГКШ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "!,", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "aa a", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "@", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "123456", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "1234.5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "-65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "-1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "+1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "5678", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "foo", + phrase: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "127", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "127.0.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "2.3", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + pf = &phraseFilter{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "5", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "127.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "27.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &phraseFilter{ + fieldName: "foo", + phrase: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + pf := &phraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "2006-01", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "002Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) + + pf = &phraseFilter{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "06", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + pf = &phraseFilter{ + fieldName: "_msg", + phrase: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + }) +} + +func testFilterMatchForTimestamps(t *testing.T, timestamps []int64, f filter, expectedRowIdxs []int) { + t.Helper() + + // Create the test storage + const storagePath = "testFilterMatchForTimestamps" + cfg := &StorageConfig{} + s := MustOpenStorage(storagePath, cfg) + + // Generate rows + getValue := func(rowIdx int) string { + return fmt.Sprintf("some value for row %d", rowIdx) + } + tenantID := TenantID{ + AccountID: 123, + ProjectID: 456, + } + generateRowsFromTimestamps(s, tenantID, timestamps, getValue) + + expectedResults := make([]string, len(expectedRowIdxs)) + expectedTimestamps := make([]int64, len(expectedRowIdxs)) + for i, idx := range expectedRowIdxs { + expectedResults[i] = getValue(idx) + expectedTimestamps[i] = timestamps[idx] + } + + testFilterMatchForStorage(t, s, tenantID, f, "_msg", expectedResults, expectedTimestamps) + + // Close and delete the test storage + s.MustClose() + fs.MustRemoveAll(storagePath) +} + +func testFilterMatchForColumns(t *testing.T, columns []column, f filter, resultColumnName string, expectedRowIdxs []int) { + t.Helper() + + // Create the test storage + const storagePath = "testFilterMatchForColumns" + cfg := &StorageConfig{} + s := MustOpenStorage(storagePath, cfg) + + // Generate rows + tenantID := TenantID{ + AccountID: 123, + ProjectID: 456, + } + generateRowsFromColumns(s, tenantID, columns) + + var values []string + for _, c := range columns { + if c.name == resultColumnName { + values = c.values + break + } + } + expectedResults := make([]string, len(expectedRowIdxs)) + expectedTimestamps := make([]int64, len(expectedRowIdxs)) + for i, idx := range expectedRowIdxs { + expectedResults[i] = values[idx] + expectedTimestamps[i] = int64(idx) * 1e9 + } + + testFilterMatchForStorage(t, s, tenantID, f, resultColumnName, expectedResults, expectedTimestamps) + + // Close and delete the test storage + s.MustClose() + fs.MustRemoveAll(storagePath) +} + +func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f filter, resultColumnName string, expectedResults []string, expectedTimestamps []int64) { + t.Helper() + + so := &genericSearchOptions{ + tenantIDs: []TenantID{tenantID}, + filter: f, + resultColumnNames: []string{resultColumnName}, + } + workersCount := 3 + s.search(workersCount, so, nil, func(workerID uint, br *blockResult) { + // Verify tenantID + if !br.streamID.tenantID.equal(&tenantID) { + t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID) + } + + // Verify columns + if len(br.cs) != 1 { + t.Fatalf("unexpected number of columns in blockResult; got %d; want 1", len(br.cs)) + } + results := br.getColumnValues(0) + if !reflect.DeepEqual(results, expectedResults) { + t.Fatalf("unexpected results matched;\ngot\n%q\nwant\n%q", results, expectedResults) + } + + // Verify timestamps + if br.timestamps == nil { + br.timestamps = []int64{} + } + if !reflect.DeepEqual(br.timestamps, expectedTimestamps) { + t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", br.timestamps, expectedTimestamps) + } + }) +} + +func generateRowsFromColumns(s *Storage, tenantID TenantID, columns []column) { + streamTags := []string{ + "job", + "instance", + } + lr := GetLogRows(streamTags, nil) + var fields []Field + for i := range columns[0].values { + // Add stream tags + fields = append(fields[:0], Field{ + Name: "job", + Value: "foobar", + }, Field{ + Name: "instance", + Value: "host1:234", + }) + // Add other columns + for j := range columns { + fields = append(fields, Field{ + Name: columns[j].name, + Value: columns[j].values[i], + }) + } + timestamp := int64(i) * 1e9 + lr.MustAdd(tenantID, timestamp, fields) + } + s.MustAddRows(lr) + PutLogRows(lr) +} + +func generateRowsFromTimestamps(s *Storage, tenantID TenantID, timestamps []int64, getValue func(rowIdx int) string) { + lr := GetLogRows(nil, nil) + var fields []Field + for i, timestamp := range timestamps { + fields = append(fields[:0], Field{ + Name: "_msg", + Value: getValue(i), + }) + lr.MustAdd(tenantID, timestamp, fields) + } + s.MustAddRows(lr) + PutLogRows(lr) +} diff --git a/lib/logstorage/hash128.go b/lib/logstorage/hash128.go new file mode 100644 index 000000000..68540f894 --- /dev/null +++ b/lib/logstorage/hash128.go @@ -0,0 +1,38 @@ +package logstorage + +import ( + "sync" + + "github.com/cespare/xxhash/v2" +) + +func hash128(data []byte) u128 { + h := getHasher() + _, _ = h.Write(data) + hi := h.Sum64() + _, _ = h.Write(magicSuffixForHash) + lo := h.Sum64() + putHasher(h) + + return u128{ + hi: hi, + lo: lo, + } +} + +var magicSuffixForHash = []byte("magic!") + +func getHasher() *xxhash.Digest { + v := hasherPool.Get() + if v == nil { + return xxhash.New() + } + return v.(*xxhash.Digest) +} + +func putHasher(h *xxhash.Digest) { + h.Reset() + hasherPool.Put(h) +} + +var hasherPool sync.Pool diff --git a/lib/logstorage/hash128_test.go b/lib/logstorage/hash128_test.go new file mode 100644 index 000000000..05439d9fb --- /dev/null +++ b/lib/logstorage/hash128_test.go @@ -0,0 +1,24 @@ +package logstorage + +import ( + "testing" +) + +func TestHash128(t *testing.T) { + f := func(data string, hashExpected u128) { + t.Helper() + h := hash128([]byte(data)) + if !h.equal(&hashExpected) { + t.Fatalf("unexpected hash; got %s; want %s", &h, &hashExpected) + } + } + f("", u128{ + hi: 17241709254077376921, + lo: 13138662262368978769, + }) + + f("abc", u128{ + hi: 4952883123889572249, + lo: 3255951525518405514, + }) +} diff --git a/lib/logstorage/hash128_timing_test.go b/lib/logstorage/hash128_timing_test.go new file mode 100644 index 000000000..7cdccb963 --- /dev/null +++ b/lib/logstorage/hash128_timing_test.go @@ -0,0 +1,29 @@ +package logstorage + +import ( + "fmt" + "sync/atomic" + "testing" +) + +func BenchmarkHash128(b *testing.B) { + a := make([][]byte, 100) + for i := range a { + a[i] = []byte(fmt.Sprintf("some string %d", i)) + } + b.ReportAllocs() + b.SetBytes(int64(len(a))) + b.RunParallel(func(pb *testing.PB) { + var n uint64 + for pb.Next() { + for _, b := range a { + h := hash128(b) + n += h.hi + n += h.lo + } + } + atomic.AddUint64(&GlobalSinkU64, n) + }) +} + +var GlobalSinkU64 uint64 diff --git a/lib/logstorage/index_block_header.go b/lib/logstorage/index_block_header.go new file mode 100644 index 000000000..c0654b10b --- /dev/null +++ b/lib/logstorage/index_block_header.go @@ -0,0 +1,164 @@ +package logstorage + +import ( + "fmt" + "io" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// indexBlockHeader contains index information about multiple blocks. +// +// It allows locating the block by streamID and/or by time range. +type indexBlockHeader struct { + // streamID is the minimum streamID covered by the indexBlockHeader + streamID streamID + + // minTimestamp is the mimumum timestamp seen across blocks covered by the indexBlockHeader + minTimestamp int64 + + // maxTimestamp is the maximum timestamp seen across blocks covered by the indexBlockHeader + maxTimestamp int64 + + // indexBlockOffset is an offset of the linked index block at indexFilename + indexBlockOffset uint64 + + // indexBlockSize is the size of the linked index block at indexFilename + indexBlockSize uint64 +} + +// reset resets ih for subsequent re-use. +func (ih *indexBlockHeader) reset() { + ih.streamID.reset() + ih.minTimestamp = 0 + ih.maxTimestamp = 0 + ih.indexBlockOffset = 0 + ih.indexBlockSize = 0 +} + +// mustWriteIndexBlock writes data with the given additioanl args to sw and updates ih accordingly. +func (ih *indexBlockHeader) mustWriteIndexBlock(data []byte, sidFirst streamID, minTimestamp, maxTimestamp int64, sw *streamWriters) { + ih.streamID = sidFirst + ih.minTimestamp = minTimestamp + ih.maxTimestamp = maxTimestamp + + bb := longTermBufPool.Get() + bb.B = encoding.CompressZSTDLevel(bb.B[:0], data, 1) + ih.indexBlockOffset = sw.indexWriter.bytesWritten + ih.indexBlockSize = uint64(len(bb.B)) + sw.indexWriter.MustWrite(bb.B) + longTermBufPool.Put(bb) +} + +// mustReadNextIndexBlock reads the next index block associated with ih from src, appends it to dst and returns the result. +func (ih *indexBlockHeader) mustReadNextIndexBlock(dst []byte, sr *streamReaders) []byte { + indexReader := &sr.indexReader + + indexBlockSize := ih.indexBlockSize + if indexBlockSize > maxIndexBlockSize { + logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockSize=%d cannot exceed %d bytes", indexReader.Path(), indexBlockSize, maxIndexBlockSize) + } + if ih.indexBlockOffset != indexReader.bytesRead { + logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockOffset=%d must equal to %d", indexReader.Path(), ih.indexBlockOffset, indexReader.bytesRead) + } + bbCompressed := longTermBufPool.Get() + bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize)) + indexReader.MustReadFull(bbCompressed.B) + + // Decompress bbCompressed to dst + var err error + dst, err = encoding.DecompressZSTD(dst, bbCompressed.B) + longTermBufPool.Put(bbCompressed) + if err != nil { + logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", indexReader.Path(), ih.indexBlockOffset, indexBlockSize, err) + } + return dst +} + +// marshal appends marshaled ih to dst and returns the result. +func (ih *indexBlockHeader) marshal(dst []byte) []byte { + dst = ih.streamID.marshal(dst) + dst = encoding.MarshalUint64(dst, uint64(ih.minTimestamp)) + dst = encoding.MarshalUint64(dst, uint64(ih.maxTimestamp)) + dst = encoding.MarshalUint64(dst, ih.indexBlockOffset) + dst = encoding.MarshalUint64(dst, ih.indexBlockSize) + return dst +} + +// unmarshal unmarshals ih from src and returns the tail left. +func (ih *indexBlockHeader) unmarshal(src []byte) ([]byte, error) { + srcOrig := src + + // unmarshal ih.streamID + tail, err := ih.streamID.unmarshal(src) + if err != nil { + return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err) + } + src = tail + + // unmarshal the rest of indexBlockHeader fields + if len(src) < 32 { + return srcOrig, fmt.Errorf("cannot unmarshal indexBlockHeader from %d bytes; need at least 32 bytes", len(src)) + } + ih.minTimestamp = int64(encoding.UnmarshalUint64(src)) + ih.maxTimestamp = int64(encoding.UnmarshalUint64(src[8:])) + ih.indexBlockOffset = encoding.UnmarshalUint64(src[16:]) + ih.indexBlockSize = encoding.UnmarshalUint64(src[24:]) + + return src[32:], nil +} + +// mustReadIndexBlockHeaders reads indexBlockHeader entries from r, appends them to dst and returns the result. +func mustReadIndexBlockHeaders(dst []indexBlockHeader, r *readerWithStats) []indexBlockHeader { + data, err := io.ReadAll(r) + if err != nil { + logger.Panicf("FATAL: cannot read indexBlockHeader entries from %s: %s", r.Path(), err) + } + + bb := longTermBufPool.Get() + bb.B, err = encoding.DecompressZSTD(bb.B[:0], data) + if err != nil { + logger.Panicf("FATAL: cannot decompress indexBlockHeader entries from %s: %s", r.Path(), err) + } + dst, err = unmarshalIndexBlockHeaders(dst, bb.B) + if len(bb.B) < 1024*1024 { + longTermBufPool.Put(bb) + } + if err != nil { + logger.Panicf("FATAL: cannot parse indexBlockHeader entries from %s: %s", r.Path(), err) + } + return dst +} + +// unmarshalIndexBlockHeaders appends unmarshaled from src indexBlockHeader entries to dst and returns the result. +func unmarshalIndexBlockHeaders(dst []indexBlockHeader, src []byte) ([]indexBlockHeader, error) { + dstOrig := dst + for len(src) > 0 { + if len(dst) < cap(dst) { + dst = dst[:len(dst)+1] + } else { + dst = append(dst, indexBlockHeader{}) + } + ih := &dst[len(dst)-1] + tail, err := ih.unmarshal(src) + if err != nil { + return dstOrig, fmt.Errorf("cannot unmarshal indexBlockHeader %d: %w", len(dst)-len(dstOrig), err) + } + src = tail + } + if err := validateIndexBlockHeaders(dst[len(dstOrig):]); err != nil { + return dstOrig, err + } + return dst, nil +} + +func validateIndexBlockHeaders(ihs []indexBlockHeader) error { + for i := 1; i < len(ihs); i++ { + if ihs[i].streamID.less(&ihs[i-1].streamID) { + return fmt.Errorf("unexpected indexBlockHeader with smaller streamID=%s after bigger streamID=%s", &ihs[i].streamID, &ihs[i-1].streamID) + } + } + return nil +} diff --git a/lib/logstorage/index_block_header_test.go b/lib/logstorage/index_block_header_test.go new file mode 100644 index 000000000..b72137456 --- /dev/null +++ b/lib/logstorage/index_block_header_test.go @@ -0,0 +1,138 @@ +package logstorage + +import ( + "reflect" + "testing" +) + +func TestIndexBlockHeaderMarshalUnmarshal(t *testing.T) { + f := func(ih *indexBlockHeader, marshaledLen int) { + t.Helper() + data := ih.marshal(nil) + if len(data) != marshaledLen { + t.Fatalf("unexpected marshaled length of indexBlockHeader; got %d; want %d", len(data), marshaledLen) + } + var ih2 indexBlockHeader + tail, err := ih2.unmarshal(data) + if err != nil { + t.Fatalf("cannot unmarshal indexBlockHeader: %s", err) + } + if len(tail) > 0 { + t.Fatalf("unexpected non-empty tail left after unmarshaling indexBlockHeader: %X", tail) + } + if !reflect.DeepEqual(ih, &ih2) { + t.Fatalf("unexpected unmarshaled indexBlockHeader\ngot\n%v\nwant\n%v", &ih2, ih) + } + } + f(&indexBlockHeader{}, 56) + f(&indexBlockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + hi: 214, + lo: 2111, + }, + }, + minTimestamp: 1234, + maxTimestamp: 898943, + indexBlockOffset: 234, + indexBlockSize: 898, + }, 56) +} + +func TestIndexBlockHeaderUnmarshalFailure(t *testing.T) { + f := func(data []byte) { + t.Helper() + dataOrig := append([]byte{}, data...) + var ih indexBlockHeader + tail, err := ih.unmarshal(data) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if string(tail) != string(dataOrig) { + t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig) + } + } + f(nil) + f([]byte("foo")) + + ih := &indexBlockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + hi: 214, + lo: 2111, + }, + }, + minTimestamp: 1234, + maxTimestamp: 898943, + indexBlockOffset: 234, + indexBlockSize: 898, + } + data := ih.marshal(nil) + for len(data) > 0 { + data = data[:len(data)-1] + f(data) + } +} + +func TestIndexBlockHeaderReset(t *testing.T) { + ih := &indexBlockHeader{ + streamID: streamID{ + tenantID: TenantID{ + AccountID: 123, + ProjectID: 456, + }, + id: u128{ + hi: 214, + lo: 2111, + }, + }, + minTimestamp: 1234, + maxTimestamp: 898943, + indexBlockOffset: 234, + indexBlockSize: 898, + } + ih.reset() + ihZero := &indexBlockHeader{} + if !reflect.DeepEqual(ih, ihZero) { + t.Fatalf("unexpected non-zero indexBlockHeader after reset: %v", ih) + } +} + +func TestMarshalUnmarshalIndexBlockHeaders(t *testing.T) { + f := func(ihs []indexBlockHeader, marshaledLen int) { + t.Helper() + var data []byte + for i := range ihs { + data = ihs[i].marshal(data) + } + if len(data) != marshaledLen { + t.Fatalf("unexpected marshaled length for indexBlockHeader entries; got %d; want %d", len(data), marshaledLen) + } + ihs2, err := unmarshalIndexBlockHeaders(nil, data) + if err != nil { + t.Fatalf("cannot unmarshal indexBlockHeader entries: %s", err) + } + if !reflect.DeepEqual(ihs, ihs2) { + t.Fatalf("unexpected indexBlockHeader entries after unmarshaling\ngot\n%v\nwant\n%v", ihs2, ihs) + } + } + f(nil, 0) + f([]indexBlockHeader{{}}, 56) + f([]indexBlockHeader{ + { + indexBlockOffset: 234, + indexBlockSize: 5432, + }, + { + minTimestamp: -123, + }, + }, 112) +} diff --git a/lib/logstorage/indexdb.go b/lib/logstorage/indexdb.go new file mode 100644 index 000000000..4e69a9cdf --- /dev/null +++ b/lib/logstorage/indexdb.go @@ -0,0 +1,900 @@ +package logstorage + +import ( + "bytes" + "fmt" + "io" + "sort" + "sync" + "sync/atomic" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil" +) + +const ( + // (tenantID:streamID) entries have this prefix + // + // These entries are used for detecting whether the given stream is already registered + nsPrefixStreamID = 0 + + // (tenantID:streamID -> streamTagsCanonical) entries have this prefix + nsPrefixStreamIDToStreamTags = 1 + + // (tenantID:name:value => streamIDs) entries have this prefix + nsPrefixTagToStreamIDs = 2 +) + +// IndexdbStats contains indexdb stats +type IndexdbStats struct { + // StreamsCreatedTotal is the number of log streams created since the indexdb initialization. + StreamsCreatedTotal uint64 +} + +type indexdb struct { + // streamsCreatedTotal is the number of log streams created since the indexdb intialization. + streamsCreatedTotal uint64 + + // path is the path to indexdb + path string + + // partitionName is the name of the partition for the indexdb. + partitionName string + + // tb is the storage for indexdb + tb *mergeset.Table + + // indexSearchPool is a pool of indexSearch struct for the given indexdb + indexSearchPool sync.Pool + + // the generation of the streamFilterCache. + // It is updated each time new item is added to tb. + streamFilterCacheGeneration uint32 + + // s is the storage where indexdb belongs to. + s *Storage +} + +func mustCreateIndexdb(path string) { + fs.MustMkdirFailIfExist(path) +} + +func mustOpenIndexdb(path, partitionName string, s *Storage) *indexdb { + idb := &indexdb{ + path: path, + partitionName: partitionName, + s: s, + } + isReadOnly := uint32(0) + idb.tb = mergeset.MustOpenTable(path, idb.invalidateStreamFilterCache, mergeTagToStreamIDsRows, &isReadOnly) + return idb +} + +func mustCloseIndexdb(idb *indexdb) { + idb.tb.MustClose() + idb.tb = nil + idb.s = nil + idb.partitionName = "" + idb.path = "" +} + +func (idb *indexdb) debugFlush() { + idb.tb.DebugFlush() +} + +func (idb *indexdb) updateStats(d *IndexdbStats) { + d.StreamsCreatedTotal += atomic.LoadUint64(&idb.streamsCreatedTotal) +} + +func (idb *indexdb) appendStreamTagsByStreamID(dst []byte, sid *streamID) []byte { + is := idb.getIndexSearch() + defer idb.putIndexSearch(is) + + ts := &is.ts + kb := &is.kb + + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamIDToStreamTags, sid.tenantID) + kb.B = sid.id.marshal(kb.B) + + if err := ts.FirstItemWithPrefix(kb.B); err != nil { + if err == io.EOF { + return dst + } + logger.Panicf("FATAL: unexpected error when searching for StreamTags by streamID=%s in indexdb: %s", sid, err) + } + data := ts.Item[len(kb.B):] + dst = append(dst, data...) + return dst +} + +// hasStreamID returns true if streamID exists in idb +func (idb *indexdb) hasStreamID(sid *streamID) bool { + is := idb.getIndexSearch() + defer idb.putIndexSearch(is) + + ts := &is.ts + kb := &is.kb + + kb.B = marshalCommonPrefix(kb.B, nsPrefixStreamID, sid.tenantID) + kb.B = sid.id.marshal(kb.B) + + if err := ts.FirstItemWithPrefix(kb.B); err != nil { + if err == io.EOF { + return false + } + logger.Panicf("FATAL: unexpected error when searching for streamID=%s in indexdb: %s", sid, err) + } + return len(kb.B) == len(ts.Item) +} + +type indexSearch struct { + idb *indexdb + ts mergeset.TableSearch + kb bytesutil.ByteBuffer +} + +func (idb *indexdb) getIndexSearch() *indexSearch { + v := idb.indexSearchPool.Get() + if v == nil { + v = &indexSearch{ + idb: idb, + } + } + is := v.(*indexSearch) + is.ts.Init(idb.tb) + return is +} + +func (idb *indexdb) putIndexSearch(is *indexSearch) { + is.idb = nil + is.ts.MustClose() + is.kb.Reset() + + idb.indexSearchPool.Put(is) +} + +// searchStreamIDs returns streamIDs for the given tenantIDs and the given stream filters +func (idb *indexdb) searchStreamIDs(tenantIDs []TenantID, sf *StreamFilter) []streamID { + // Try obtaining streamIDs from cache + streamIDs, ok := idb.loadStreamIDsFromCache(tenantIDs, sf) + if ok { + // Fast path - streamIDs found in the cache. + return streamIDs + } + + // Slow path - collect streamIDs from indexdb. + + // Collect streamIDs for all the specified tenantIDs. + is := idb.getIndexSearch() + m := make(map[streamID]struct{}) + for _, tenantID := range tenantIDs { + for _, asf := range sf.orFilters { + is.updateStreamIDs(m, tenantID, asf) + } + } + idb.putIndexSearch(is) + + // Convert the collected streamIDs from m to sorted slice. + streamIDs = make([]streamID, 0, len(m)) + for streamID := range m { + streamIDs = append(streamIDs, streamID) + } + sortStreamIDs(streamIDs) + + // Store the collected streamIDs to cache. + idb.storeStreamIDsToCache(tenantIDs, sf, streamIDs) + + return streamIDs +} + +func sortStreamIDs(streamIDs []streamID) { + sort.Slice(streamIDs, func(i, j int) bool { + return streamIDs[i].less(&streamIDs[j]) + }) +} + +func (is *indexSearch) updateStreamIDs(dst map[streamID]struct{}, tenantID TenantID, asf *andStreamFilter) { + var m map[u128]struct{} + for _, tf := range asf.tagFilters { + ids := is.getStreamIDsForTagFilter(tenantID, tf) + if len(ids) == 0 { + // There is no need in checking the remaining filters, + // since the result will be empty in any case. + return + } + if m == nil { + m = ids + } else { + for id := range m { + if _, ok := ids[id]; !ok { + delete(m, id) + } + } + } + } + + var sid streamID + for id := range m { + sid.tenantID = tenantID + sid.id = id + dst[sid] = struct{}{} + } +} + +func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTagFilter) map[u128]struct{} { + switch tf.op { + case "=": + if tf.value == "" { + // (field="") + return is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName) + } + // (field="value") + return is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value) + case "!=": + if tf.value == "" { + // (field!="") + return is.getStreamIDsForTagName(tenantID, tf.tagName) + } + // (field!="value") => (all and not field="value") + ids := is.getStreamIDsForTenant(tenantID) + idsForTag := is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value) + for id := range idsForTag { + delete(ids, id) + } + return ids + case "=~": + re := tf.getRegexp() + if re.MatchString("") { + // (field=~"|re") => (field="" or field=~"re") + ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName) + idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re) + for id := range idsForRe { + ids[id] = struct{}{} + } + return ids + } + return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re) + case "!~": + re := tf.getRegexp() + if re.MatchString("") { + // (field!~"|re") => (field!="" and not field=~"re") + ids := is.getStreamIDsForTagName(tenantID, tf.tagName) + if len(ids) == 0 { + return ids + } + idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re) + for id := range idsForRe { + delete(ids, id) + } + return ids + } + // (field!~"re") => (all and not field=~"re") + ids := is.getStreamIDsForTenant(tenantID) + idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re) + for id := range idsForRe { + delete(ids, id) + } + return ids + default: + logger.Panicf("BUG: unexpected operation in stream tag filter: %q", tf.op) + return nil + } +} + +func (is *indexSearch) getStreamIDsForNonEmptyTagValue(tenantID TenantID, tagName, tagValue string) map[u128]struct{} { + ids := make(map[u128]struct{}) + var sp tagToStreamIDsRowParser + + ts := &is.ts + kb := &is.kb + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID) + kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName)) + kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagValue)) + prefix := kb.B + ts.Seek(prefix) + for ts.NextItem() { + item := ts.Item + if !bytes.HasPrefix(item, prefix) { + break + } + tail := item[len(prefix):] + sp.UpdateStreamIDs(ids, tail) + } + if err := ts.Error(); err != nil { + logger.Panicf("FATAL: unexpected error: %s", err) + } + + return ids +} + +func (is *indexSearch) getStreamIDsForEmptyTagValue(tenantID TenantID, tagName string) map[u128]struct{} { + ids := is.getStreamIDsForTenant(tenantID) + idsForTag := is.getStreamIDsForTagName(tenantID, tagName) + for id := range idsForTag { + delete(ids, id) + } + return ids +} + +func (is *indexSearch) getStreamIDsForTenant(tenantID TenantID) map[u128]struct{} { + ids := make(map[u128]struct{}) + ts := &is.ts + kb := &is.kb + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamID, tenantID) + prefix := kb.B + ts.Seek(prefix) + var id u128 + for ts.NextItem() { + item := ts.Item + if !bytes.HasPrefix(item, prefix) { + break + } + tail, err := id.unmarshal(item[len(prefix):]) + if err != nil { + logger.Panicf("FATAL: cannot unmarshal streamID from (tenantID:streamID) entry: %s", err) + } + if len(tail) > 0 { + logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling streamID from (tenantID:streamID); tail len=%d", len(tail)) + } + ids[id] = struct{}{} + } + if err := ts.Error(); err != nil { + logger.Panicf("FATAL: unexpected error: %s", err) + } + + return ids +} + +func (is *indexSearch) getStreamIDsForTagName(tenantID TenantID, tagName string) map[u128]struct{} { + ids := make(map[u128]struct{}) + var sp tagToStreamIDsRowParser + + ts := &is.ts + kb := &is.kb + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID) + kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName)) + prefix := kb.B + ts.Seek(prefix) + for ts.NextItem() { + item := ts.Item + if !bytes.HasPrefix(item, prefix) { + break + } + tail := item[len(prefix):] + n := bytes.IndexByte(tail, tagSeparatorChar) + if n < 0 { + logger.Panicf("FATAL: cannot find the end of tag value") + } + tail = tail[n+1:] + sp.UpdateStreamIDs(ids, tail) + } + if err := ts.Error(); err != nil { + logger.Panicf("FATAL: unexpected error: %s", err) + } + + return ids +} + +func (is *indexSearch) getStreamIDsForTagRegexp(tenantID TenantID, tagName string, re *regexutil.PromRegex) map[u128]struct{} { + ids := make(map[u128]struct{}) + var sp tagToStreamIDsRowParser + var tagValue, prevMatchingTagValue []byte + var err error + + ts := &is.ts + kb := &is.kb + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID) + kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName)) + prefix := kb.B + ts.Seek(prefix) + for ts.NextItem() { + item := ts.Item + if !bytes.HasPrefix(item, prefix) { + break + } + tail := item[len(prefix):] + tail, tagValue, err = unmarshalTagValue(tagValue[:0], tail) + if err != nil { + logger.Panicf("FATAL: cannot unmarshal tag value: %s", err) + } + if !bytes.Equal(tagValue, prevMatchingTagValue) { + if !re.MatchString(bytesutil.ToUnsafeString(tagValue)) { + continue + } + prevMatchingTagValue = append(prevMatchingTagValue[:0], tagValue...) + } + sp.UpdateStreamIDs(ids, tail) + } + if err := ts.Error(); err != nil { + logger.Panicf("FATAL: unexpected error: %s", err) + } + + return ids +} + +func (idb *indexdb) mustRegisterStream(streamID *streamID, streamTagsCanonical []byte) { + st := GetStreamTags() + mustUnmarshalStreamTags(st, streamTagsCanonical) + tenantID := streamID.tenantID + + bi := getBatchItems() + buf := bi.buf[:0] + items := bi.items[:0] + + // Register tenantID:streamID entry. + bufLen := len(buf) + buf = marshalCommonPrefix(buf, nsPrefixStreamID, tenantID) + buf = streamID.id.marshal(buf) + items = append(items, buf[bufLen:]) + + // Register tenantID:streamID -> streamTagsCanonical entry. + bufLen = len(buf) + buf = marshalCommonPrefix(buf, nsPrefixStreamIDToStreamTags, tenantID) + buf = streamID.id.marshal(buf) + buf = append(buf, streamTagsCanonical...) + items = append(items, buf[bufLen:]) + + // Register tenantID:name:value -> streamIDs entries. + tags := st.tags + for i := range tags { + bufLen = len(buf) + buf = marshalCommonPrefix(buf, nsPrefixTagToStreamIDs, tenantID) + buf = tags[i].indexdbMarshal(buf) + buf = streamID.id.marshal(buf) + items = append(items, buf[bufLen:]) + } + PutStreamTags(st) + + // Add items to the storage + idb.tb.AddItems(items) + + bi.buf = buf + bi.items = items + putBatchItems(bi) + + atomic.AddUint64(&idb.streamsCreatedTotal, 1) +} + +func (idb *indexdb) invalidateStreamFilterCache() { + // This function must be fast, since it is called each + // time new indexdb entry is added. + atomic.AddUint32(&idb.streamFilterCacheGeneration, 1) +} + +func (idb *indexdb) marshalStreamFilterCacheKey(dst []byte, tenantIDs []TenantID, sf *StreamFilter) []byte { + dst = encoding.MarshalUint32(dst, idb.streamFilterCacheGeneration) + dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(idb.partitionName)) + dst = encoding.MarshalVarUint64(dst, uint64(len(tenantIDs))) + for i := range tenantIDs { + dst = tenantIDs[i].marshal(dst) + } + dst = sf.marshalForCacheKey(dst) + return dst +} + +func (idb *indexdb) loadStreamIDsFromCache(tenantIDs []TenantID, sf *StreamFilter) ([]streamID, bool) { + bb := bbPool.Get() + bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf) + data := idb.s.streamFilterCache.GetBig(nil, bb.B) + bbPool.Put(bb) + if len(data) == 0 { + // Cache miss + return nil, false + } + // Cache hit - unpack streamIDs from data. + tail, n, err := encoding.UnmarshalVarUint64(data) + if err != nil { + logger.Panicf("BUG: unexpected error when unmarshaling the number of streamIDs from cache: %s", err) + } + src := tail + streamIDs := make([]streamID, n) + for i := uint64(0); i < n; i++ { + tail, err = streamIDs[i].unmarshal(src) + if err != nil { + logger.Panicf("BUG: unexpected error when unmarshaling streamID #%d: %s", i, err) + } + src = tail + } + if len(src) > 0 { + logger.Panicf("BUG: unexpected non-empty tail left with len=%d", len(src)) + } + return streamIDs, true +} + +func (idb *indexdb) storeStreamIDsToCache(tenantIDs []TenantID, sf *StreamFilter, streamIDs []streamID) { + // marshal streamIDs + var b []byte + b = encoding.MarshalVarUint64(b, uint64(len(streamIDs))) + for i := 0; i < len(streamIDs); i++ { + b = streamIDs[i].marshal(b) + } + + // Store marshaled streamIDs to cache. + bb := bbPool.Get() + bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf) + idb.s.streamFilterCache.SetBig(bb.B, b) + bbPool.Put(bb) +} + +type batchItems struct { + buf []byte + + items [][]byte +} + +func (bi *batchItems) reset() { + bi.buf = bi.buf[:0] + + items := bi.items + for i := range items { + items[i] = nil + } + bi.items = items[:0] +} + +func getBatchItems() *batchItems { + v := batchItemsPool.Get() + if v == nil { + return &batchItems{} + } + return v.(*batchItems) +} + +func putBatchItems(bi *batchItems) { + bi.reset() + batchItemsPool.Put(bi) +} + +var batchItemsPool sync.Pool + +func mergeTagToStreamIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) { + // Perform quick checks whether items contain rows starting from nsPrefixTagToStreamIDs + // based on the fact that items are sorted. + if len(items) <= 2 { + // The first and the last row must remain unchanged. + return data, items + } + firstItem := items[0].Bytes(data) + if len(firstItem) > 0 && firstItem[0] > nsPrefixTagToStreamIDs { + return data, items + } + lastItem := items[len(items)-1].Bytes(data) + if len(lastItem) > 0 && lastItem[0] < nsPrefixTagToStreamIDs { + return data, items + } + + // items contain at least one row starting from nsPrefixTagToStreamIDs. Merge rows with common tag. + tsm := getTagToStreamIDsRowsMerger() + tsm.dataCopy = append(tsm.dataCopy[:0], data...) + tsm.itemsCopy = append(tsm.itemsCopy[:0], items...) + sp := &tsm.sp + spPrev := &tsm.spPrev + dstData := data[:0] + dstItems := items[:0] + for i, it := range items { + item := it.Bytes(data) + if len(item) == 0 || item[0] != nsPrefixTagToStreamIDs || i == 0 || i == len(items)-1 { + // Write rows not starting with nsPrefixTagToStreamIDs as-is. + // Additionally write the first and the last row as-is in order to preserve + // sort order for adjacent blocks. + dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev) + dstData = append(dstData, item...) + dstItems = append(dstItems, mergeset.Item{ + Start: uint32(len(dstData) - len(item)), + End: uint32(len(dstData)), + }) + continue + } + if err := sp.Init(item); err != nil { + logger.Panicf("FATAL: cannot parse row during merge: %s", err) + } + if sp.StreamIDsLen() >= maxStreamIDsPerRow { + dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev) + dstData = append(dstData, item...) + dstItems = append(dstItems, mergeset.Item{ + Start: uint32(len(dstData) - len(item)), + End: uint32(len(dstData)), + }) + continue + } + if !sp.EqualPrefix(spPrev) { + dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev) + } + sp.ParseStreamIDs() + tsm.pendingStreamIDs = append(tsm.pendingStreamIDs, sp.StreamIDs...) + spPrev, sp = sp, spPrev + if len(tsm.pendingStreamIDs) >= maxStreamIDsPerRow { + dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev) + } + } + if len(tsm.pendingStreamIDs) > 0 { + logger.Panicf("BUG: tsm.pendingStreamIDs must be empty at this point; got %d items", len(tsm.pendingStreamIDs)) + } + if !checkItemsSorted(dstData, dstItems) { + // Items could become unsorted if initial items contain duplicate streamIDs: + // + // item1: 1, 1, 5 + // item2: 1, 4 + // + // Items could become the following after the merge: + // + // item1: 1, 5 + // item2: 1, 4 + // + // i.e. item1 > item2 + // + // Leave the original items unmerged, so they can be merged next time. + // This case should be quite rare - if multiple data points are simultaneously inserted + // into the same new time series from multiple concurrent goroutines. + dstData = append(dstData[:0], tsm.dataCopy...) + dstItems = append(dstItems[:0], tsm.itemsCopy...) + if !checkItemsSorted(dstData, dstItems) { + logger.Panicf("BUG: the original items weren't sorted; items=%q", dstItems) + } + } + putTagToStreamIDsRowsMerger(tsm) + return dstData, dstItems +} + +// maxStreamIDsPerRow limits the number of streamIDs in tenantID:name:value -> streamIDs row. +// +// This reduces overhead on index and metaindex in lib/mergeset. +const maxStreamIDsPerRow = 32 + +type u128Sorter []u128 + +func (s u128Sorter) Len() int { return len(s) } +func (s u128Sorter) Less(i, j int) bool { + return s[i].less(&s[j]) +} +func (s u128Sorter) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +type tagToStreamIDsRowsMerger struct { + pendingStreamIDs u128Sorter + sp tagToStreamIDsRowParser + spPrev tagToStreamIDsRowParser + + itemsCopy []mergeset.Item + dataCopy []byte +} + +func (tsm *tagToStreamIDsRowsMerger) Reset() { + tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0] + tsm.sp.Reset() + tsm.spPrev.Reset() + + tsm.itemsCopy = tsm.itemsCopy[:0] + tsm.dataCopy = tsm.dataCopy[:0] +} + +func (tsm *tagToStreamIDsRowsMerger) flushPendingStreamIDs(dstData []byte, dstItems []mergeset.Item, sp *tagToStreamIDsRowParser) ([]byte, []mergeset.Item) { + if len(tsm.pendingStreamIDs) == 0 { + // Nothing to flush + return dstData, dstItems + } + // Use sort.Sort instead of sort.Slice in order to reduce memory allocations. + sort.Sort(&tsm.pendingStreamIDs) + tsm.pendingStreamIDs = removeDuplicateStreamIDs(tsm.pendingStreamIDs) + + // Marshal pendingStreamIDs + dstDataLen := len(dstData) + dstData = sp.MarshalPrefix(dstData) + pendingStreamIDs := tsm.pendingStreamIDs + for i := range pendingStreamIDs { + dstData = pendingStreamIDs[i].marshal(dstData) + } + dstItems = append(dstItems, mergeset.Item{ + Start: uint32(dstDataLen), + End: uint32(len(dstData)), + }) + tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0] + return dstData, dstItems +} + +func removeDuplicateStreamIDs(sortedStreamIDs []u128) []u128 { + if len(sortedStreamIDs) < 2 { + return sortedStreamIDs + } + hasDuplicates := false + for i := 1; i < len(sortedStreamIDs); i++ { + if sortedStreamIDs[i-1] == sortedStreamIDs[i] { + hasDuplicates = true + break + } + } + if !hasDuplicates { + return sortedStreamIDs + } + dstStreamIDs := sortedStreamIDs[:1] + for i := 1; i < len(sortedStreamIDs); i++ { + if sortedStreamIDs[i-1] == sortedStreamIDs[i] { + continue + } + dstStreamIDs = append(dstStreamIDs, sortedStreamIDs[i]) + } + return dstStreamIDs +} + +func getTagToStreamIDsRowsMerger() *tagToStreamIDsRowsMerger { + v := tsmPool.Get() + if v == nil { + return &tagToStreamIDsRowsMerger{} + } + return v.(*tagToStreamIDsRowsMerger) +} + +func putTagToStreamIDsRowsMerger(tsm *tagToStreamIDsRowsMerger) { + tsm.Reset() + tsmPool.Put(tsm) +} + +var tsmPool sync.Pool + +type tagToStreamIDsRowParser struct { + // TenantID contains TenantID of the parsed row + TenantID TenantID + + // StreamIDs contains parsed StreamIDs after ParseStreamIDs call + StreamIDs []u128 + + // streamIDsParsed is set to true after ParseStreamIDs call + streamIDsParsed bool + + // Tag contains parsed tag after Init call + Tag streamTag + + // tail contains the remaining unparsed streamIDs + tail []byte +} + +func (sp *tagToStreamIDsRowParser) Reset() { + sp.TenantID.Reset() + sp.StreamIDs = sp.StreamIDs[:0] + sp.streamIDsParsed = false + sp.Tag.reset() + sp.tail = nil +} + +// Init initializes sp from b, which should contain encoded tenantID:name:value -> streamIDs row. +// +// b cannot be re-used until Reset call. +// +// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail. +func (sp *tagToStreamIDsRowParser) Init(b []byte) error { + tail, nsPrefix, err := unmarshalCommonPrefix(&sp.TenantID, b) + if err != nil { + return fmt.Errorf("invalid tenantID:name:value -> streamIDs row %q: %w", b, err) + } + if nsPrefix != nsPrefixTagToStreamIDs { + return fmt.Errorf("invalid prefix for tenantID:name:value -> streamIDs row %q; got %d; want %d", b, nsPrefix, nsPrefixTagToStreamIDs) + } + tail, err = sp.Tag.indexdbUnmarshal(tail) + if err != nil { + return fmt.Errorf("cannot unmarshal tag from tenantID:name:value -> streamIDs row %q: %w", b, err) + } + if err = sp.InitOnlyTail(tail); err != nil { + return fmt.Errorf("cannot initialize tail from tenantID:name:value -> streamIDs row %q: %w", b, err) + } + return nil +} + +// MarshalPrefix marshals row prefix without tail to dst. +func (sp *tagToStreamIDsRowParser) MarshalPrefix(dst []byte) []byte { + dst = marshalCommonPrefix(dst, nsPrefixTagToStreamIDs, sp.TenantID) + dst = sp.Tag.indexdbMarshal(dst) + return dst +} + +// InitOnlyTail initializes sp.tail from tail, which must contain streamIDs. +// +// tail cannot be re-used until Reset call. +// +// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail. +func (sp *tagToStreamIDsRowParser) InitOnlyTail(tail []byte) error { + if len(tail) == 0 { + return fmt.Errorf("missing streamID in the tenantID:name:value -> streamIDs row") + } + if len(tail)%16 != 0 { + return fmt.Errorf("invalid tail length in the tenantID:name:value -> streamIDs row; got %d bytes; must be multiple of 16 bytes", len(tail)) + } + sp.tail = tail + sp.streamIDsParsed = false + return nil +} + +// EqualPrefix returns true if prefixes for sp and x are equal. +// +// Prefix contains (tenantID:name:value) +func (sp *tagToStreamIDsRowParser) EqualPrefix(x *tagToStreamIDsRowParser) bool { + if !sp.TenantID.equal(&x.TenantID) { + return false + } + if !sp.Tag.equal(&x.Tag) { + return false + } + return true +} + +// StreamIDsLen returns the number of StreamIDs in the sp.tail +func (sp *tagToStreamIDsRowParser) StreamIDsLen() int { + return len(sp.tail) / 16 +} + +// ParseStreamIDs parses StreamIDs from sp.tail into sp.StreamIDs. +func (sp *tagToStreamIDsRowParser) ParseStreamIDs() { + if sp.streamIDsParsed { + return + } + tail := sp.tail + n := len(tail) / 16 + streamIDs := sp.StreamIDs[:0] + if n <= cap(streamIDs) { + streamIDs = streamIDs[:n] + } else { + streamIDs = append(streamIDs[:cap(streamIDs)], make([]u128, n-cap(streamIDs))...) + } + sp.StreamIDs = streamIDs + for i := 0; i < n; i++ { + var err error + tail, err = streamIDs[i].unmarshal(tail) + if err != nil { + logger.Panicf("FATAL: cannot unmarshal streamID: %s", err) + } + } + sp.streamIDsParsed = true +} + +func (sp *tagToStreamIDsRowParser) UpdateStreamIDs(ids map[u128]struct{}, tail []byte) { + sp.Reset() + if err := sp.InitOnlyTail(tail); err != nil { + logger.Panicf("FATAL: cannot parse '(date, tag) -> streamIDs' row: %s", err) + } + sp.ParseStreamIDs() + for _, id := range sp.StreamIDs { + ids[id] = struct{}{} + } +} + +// commonPrefixLen is the length of common prefix for indexdb rows +// 1 byte for ns* prefix + 8 bytes for tenantID +const commonPrefixLen = 1 + 8 + +func marshalCommonPrefix(dst []byte, nsPrefix byte, tenantID TenantID) []byte { + dst = append(dst, nsPrefix) + dst = tenantID.marshal(dst) + return dst +} + +func unmarshalCommonPrefix(dstTenantID *TenantID, src []byte) ([]byte, byte, error) { + if len(src) < commonPrefixLen { + return nil, 0, fmt.Errorf("cannot unmarshal common prefix from %d bytes; need at least %d bytes; data=%X", len(src), commonPrefixLen, src) + } + prefix := src[0] + src = src[1:] + tail, err := dstTenantID.unmarshal(src) + if err != nil { + return nil, 0, fmt.Errorf("cannot unmarshal tenantID: %s", err) + } + return tail, prefix, nil +} + +func checkItemsSorted(data []byte, items []mergeset.Item) bool { + if len(items) == 0 { + return true + } + prevItem := items[0].String(data) + for _, it := range items[1:] { + currItem := it.String(data) + if prevItem > currItem { + return false + } + prevItem = currItem + } + return true +} diff --git a/lib/logstorage/indexdb_test.go b/lib/logstorage/indexdb_test.go new file mode 100644 index 000000000..02e0951f0 --- /dev/null +++ b/lib/logstorage/indexdb_test.go @@ -0,0 +1,253 @@ +package logstorage + +import ( + "fmt" + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" +) + +func TestStorageSearchStreamIDs(t *testing.T) { + const path = "TestStorageSearchStreamIDs" + const partitionName = "foobar" + s := newTestStorage() + mustCreateIndexdb(path) + idb := mustOpenIndexdb(path, partitionName, s) + + tenantID := TenantID{ + AccountID: 123, + ProjectID: 567, + } + getStreamIDForTags := func(tags map[string]string) (streamID, []byte) { + st := GetStreamTags() + for k, v := range tags { + st.Add(k, v) + } + streamTagsCanonical := st.MarshalCanonical(nil) + PutStreamTags(st) + id := hash128(streamTagsCanonical) + sid := streamID{ + tenantID: tenantID, + id: id, + } + return sid, streamTagsCanonical + } + + // Create indexdb entries + const jobsCount = 7 + const instancesCount = 5 + for i := 0; i < jobsCount; i++ { + for j := 0; j < instancesCount; j++ { + sid, streamTagsCanonical := getStreamIDForTags(map[string]string{ + "job": fmt.Sprintf("job-%d", i), + "instance": fmt.Sprintf("instance-%d", j), + }) + idb.mustRegisterStream(&sid, streamTagsCanonical) + } + } + idb.debugFlush() + + f := func(streamFilter string, expectedStreamIDs []streamID) { + t.Helper() + sf := mustNewStreamFilter(streamFilter) + if expectedStreamIDs == nil { + expectedStreamIDs = []streamID{} + } + sortStreamIDs(expectedStreamIDs) + for i := 0; i < 3; i++ { + streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf) + if !reflect.DeepEqual(streamIDs, expectedStreamIDs) { + t.Fatalf("unexpected streamIDs on iteration %d; got %v; want %v", i, streamIDs, expectedStreamIDs) + } + } + } + + t.Run("missing-tenant-id", func(t *testing.T) { + tenantID := TenantID{ + AccountID: 1, + ProjectID: 2, + } + sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`) + for i := 0; i < 3; i++ { + streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf) + if len(streamIDs) > 0 { + t.Fatalf("unexpected non-empty streamIDs on iteration %d: %d", i, len(streamIDs)) + } + } + }) + t.Run("missing-job", func(t *testing.T) { + f(`{job="non-existing-job",instance="instance-0"}`, nil) + }) + t.Run("missing-job-re", func(t *testing.T) { + f(`{job=~"non-existing-job|",instance="instance-0"}`, nil) + }) + t.Run("missing-job-negative-re", func(t *testing.T) { + f(`{job!~"job.+",instance="instance-0"}`, nil) + }) + t.Run("empty-job", func(t *testing.T) { + f(`{job="",instance="instance-0"}`, nil) + }) + t.Run("missing-instance", func(t *testing.T) { + f(`{job="job-0",instance="non-existing-instance"}`, nil) + }) + t.Run("missing-instance-re", func(t *testing.T) { + f(`{job="job-0",instance=~"non-existing-instance|"}`, nil) + }) + t.Run("missing-instance-negative-re", func(t *testing.T) { + f(`{job="job-0",instance!~"instance.+"}`, nil) + }) + t.Run("empty-instance", func(t *testing.T) { + f(`{job="job-0",instance=""}`, nil) + }) + t.Run("non-existing-tag", func(t *testing.T) { + f(`{job="job-0",instance="instance-0",non_existing_tag="foobar"}`, nil) + }) + t.Run("non-existing-non-empty-tag", func(t *testing.T) { + f(`{job="job-0",instance="instance-0",non_existing_tag!=""}`, nil) + }) + t.Run("non-existing-tag-re", func(t *testing.T) { + f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo.+"}`, nil) + }) + t.Run("non-existing-non-empty-tag-re", func(t *testing.T) { + f(`{job="job-0",instance="instance-0",non_existing_tag!~""}`, nil) + }) + + t.Run("match-job-instance", func(t *testing.T) { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": "instance-0", + "job": "job-0", + }) + f(`{job="job-0",instance="instance-0"}`, []streamID{sid}) + }) + t.Run("match-non-existing-tag", func(t *testing.T) { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": "instance-0", + "job": "job-0", + }) + f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo|"}`, []streamID{sid}) + }) + t.Run("match-job", func(t *testing.T) { + var streamIDs []streamID + for i := 0; i < instancesCount; i++ { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", i), + "job": "job-0", + }) + streamIDs = append(streamIDs, sid) + } + f(`{job="job-0"}`, streamIDs) + }) + t.Run("match-instance", func(t *testing.T) { + var streamIDs []streamID + for i := 0; i < jobsCount; i++ { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": "instance-1", + "job": fmt.Sprintf("job-%d", i), + }) + streamIDs = append(streamIDs, sid) + } + f(`{instance="instance-1"}`, streamIDs) + }) + t.Run("match-re", func(t *testing.T) { + var streamIDs []streamID + for _, instanceID := range []int{3, 1} { + for _, jobID := range []int{0, 2} { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", instanceID), + "job": fmt.Sprintf("job-%d", jobID), + }) + streamIDs = append(streamIDs, sid) + } + } + f(`{job=~"job-(0|2)",instance=~"instance-[13]"}`, streamIDs) + }) + t.Run("match-re-empty-match", func(t *testing.T) { + var streamIDs []streamID + for _, instanceID := range []int{3, 1} { + for _, jobID := range []int{0, 2} { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", instanceID), + "job": fmt.Sprintf("job-%d", jobID), + }) + streamIDs = append(streamIDs, sid) + } + } + f(`{job=~"job-(0|2)|",instance=~"instance-[13]"}`, streamIDs) + }) + t.Run("match-negative-re", func(t *testing.T) { + var instanceIDs []int + for i := 0; i < instancesCount; i++ { + if i != 0 && i != 1 { + instanceIDs = append(instanceIDs, i) + } + } + var jobIDs []int + for i := 0; i < jobsCount; i++ { + if i > 2 { + jobIDs = append(jobIDs, i) + } + } + var streamIDs []streamID + for _, instanceID := range instanceIDs { + for _, jobID := range jobIDs { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", instanceID), + "job": fmt.Sprintf("job-%d", jobID), + }) + streamIDs = append(streamIDs, sid) + } + } + f(`{job!~"job-[0-2]",instance!~"instance-(0|1)"}`, streamIDs) + }) + t.Run("match-negative-re-empty-match", func(t *testing.T) { + var instanceIDs []int + for i := 0; i < instancesCount; i++ { + if i != 0 && i != 1 { + instanceIDs = append(instanceIDs, i) + } + } + var jobIDs []int + for i := 0; i < jobsCount; i++ { + if i > 2 { + jobIDs = append(jobIDs, i) + } + } + var streamIDs []streamID + for _, instanceID := range instanceIDs { + for _, jobID := range jobIDs { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", instanceID), + "job": fmt.Sprintf("job-%d", jobID), + }) + streamIDs = append(streamIDs, sid) + } + } + f(`{job!~"job-[0-2]",instance!~"instance-(0|1)|"}`, streamIDs) + }) + t.Run("match-negative-job", func(t *testing.T) { + instanceIDs := []int{2} + var jobIDs []int + for i := 0; i < jobsCount; i++ { + if i != 1 { + jobIDs = append(jobIDs, i) + } + } + var streamIDs []streamID + for _, instanceID := range instanceIDs { + for _, jobID := range jobIDs { + sid, _ := getStreamIDForTags(map[string]string{ + "instance": fmt.Sprintf("instance-%d", instanceID), + "job": fmt.Sprintf("job-%d", jobID), + }) + streamIDs = append(streamIDs, sid) + } + } + f(`{instance="instance-2",job!="job-1"}`, streamIDs) + }) + + mustCloseIndexdb(idb) + fs.MustRemoveAll(path) + + closeTestStorage(s) +} diff --git a/lib/logstorage/inmemory_part.go b/lib/logstorage/inmemory_part.go new file mode 100644 index 000000000..2afd970ec --- /dev/null +++ b/lib/logstorage/inmemory_part.go @@ -0,0 +1,155 @@ +package logstorage + +import ( + "path/filepath" + "sort" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" +) + +// inmemoryPart is an in-memory part. +type inmemoryPart struct { + // ph contains partHeader information for the given in-memory part. + ph partHeader + + metaindex bytesutil.ByteBuffer + index bytesutil.ByteBuffer + columnsHeader bytesutil.ByteBuffer + timestamps bytesutil.ByteBuffer + fieldValues bytesutil.ByteBuffer + fieldBloomFilter bytesutil.ByteBuffer + messageValues bytesutil.ByteBuffer + messageBloomFilter bytesutil.ByteBuffer +} + +// reset resets mp, so it can be re-used +func (mp *inmemoryPart) reset() { + mp.ph.reset() + + mp.metaindex.Reset() + mp.index.Reset() + mp.columnsHeader.Reset() + mp.timestamps.Reset() + mp.fieldValues.Reset() + mp.fieldBloomFilter.Reset() + mp.messageValues.Reset() + mp.messageBloomFilter.Reset() +} + +// mustInitFromRows initializes mp from lr. +func (mp *inmemoryPart) mustInitFromRows(lr *LogRows) { + mp.reset() + + if len(lr.timestamps) == 0 { + return + } + + sort.Sort(lr) + + bsw := getBlockStreamWriter() + bsw.MustInitForInmemoryPart(mp) + trs := getTmpRows() + var sidPrev *streamID + uncompressedBlockSizeBytes := uint64(0) + timestamps := lr.timestamps + rows := lr.rows + streamIDs := lr.streamIDs + for i := range timestamps { + streamID := &streamIDs[i] + if sidPrev == nil { + sidPrev = streamID + } + + if uncompressedBlockSizeBytes >= maxUncompressedBlockSize || !streamID.equal(sidPrev) { + bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows) + trs.reset() + sidPrev = streamID + uncompressedBlockSizeBytes = 0 + } + fields := rows[i] + trs.timestamps = append(trs.timestamps, timestamps[i]) + trs.rows = append(trs.rows, fields) + uncompressedBlockSizeBytes += uncompressedRowSizeBytes(fields) + } + bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows) + putTmpRows(trs) + bsw.Finalize(&mp.ph) + putBlockStreamWriter(bsw) +} + +// MustStoreToDisk stores mp to disk at the given path. +func (mp *inmemoryPart) MustStoreToDisk(path string) { + fs.MustMkdirFailIfExist(path) + + metaindexPath := filepath.Join(path, metaindexFilename) + indexPath := filepath.Join(path, indexFilename) + columnsHeaderPath := filepath.Join(path, columnsHeaderFilename) + timestampsPath := filepath.Join(path, timestampsFilename) + fieldValuesPath := filepath.Join(path, fieldValuesFilename) + fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename) + messageValuesPath := filepath.Join(path, messageValuesFilename) + messageBloomFilterPath := filepath.Join(path, messageBloomFilename) + + fs.MustWriteSync(metaindexPath, mp.metaindex.B) + fs.MustWriteSync(indexPath, mp.index.B) + fs.MustWriteSync(columnsHeaderPath, mp.columnsHeader.B) + fs.MustWriteSync(timestampsPath, mp.timestamps.B) + fs.MustWriteSync(fieldValuesPath, mp.fieldValues.B) + fs.MustWriteSync(fieldBloomFilterPath, mp.fieldBloomFilter.B) + fs.MustWriteSync(messageValuesPath, mp.messageValues.B) + fs.MustWriteSync(messageBloomFilterPath, mp.messageBloomFilter.B) + + mp.ph.mustWriteMetadata(path) + + fs.MustSyncPath(path) + // Do not sync parent directory - it must be synced by the caller. +} + +// tmpRows is used as a helper for inmemoryPart.mustInitFromRows() +type tmpRows struct { + timestamps []int64 + + rows [][]Field +} + +func (trs *tmpRows) reset() { + trs.timestamps = trs.timestamps[:0] + + rows := trs.rows + for i := range rows { + rows[i] = nil + } + trs.rows = rows[:0] +} + +func getTmpRows() *tmpRows { + v := tmpRowsPool.Get() + if v == nil { + return &tmpRows{} + } + return v.(*tmpRows) +} + +func putTmpRows(trs *tmpRows) { + trs.reset() + tmpRowsPool.Put(trs) +} + +var tmpRowsPool sync.Pool + +func getInmemoryPart() *inmemoryPart { + v := inmemoryPartPool.Get() + if v == nil { + return &inmemoryPart{} + } + return v.(*inmemoryPart) +} + +func putInmemoryPart(mp *inmemoryPart) { + mp.reset() + inmemoryPartPool.Put(mp) +} + +var inmemoryPartPool sync.Pool diff --git a/lib/logstorage/inmemory_part_test.go b/lib/logstorage/inmemory_part_test.go new file mode 100644 index 000000000..85b2c9f78 --- /dev/null +++ b/lib/logstorage/inmemory_part_test.go @@ -0,0 +1,343 @@ +package logstorage + +import ( + "fmt" + "math" + "math/rand" + "reflect" + "sort" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +func TestInmemoryPartMustInitFromRows(t *testing.T) { + f := func(lr *LogRows, blocksCountExpected int, compressionRateExpected float64) { + t.Helper() + + uncompressedSizeBytesExpected := uncompressedRowsSizeBytes(lr.rows) + rowsCountExpected := len(lr.timestamps) + minTimestampExpected := int64(math.MaxInt64) + maxTimestampExpected := int64(math.MinInt64) + + // make a copy of lr - it is used for comapring the results later, + // since lr may be modified by inmemoryPart.mustInitFromRows() + lrOrig := GetLogRows(nil, nil) + for i, timestamp := range lr.timestamps { + if timestamp < minTimestampExpected { + minTimestampExpected = timestamp + } + if timestamp > maxTimestampExpected { + maxTimestampExpected = timestamp + } + lrOrig.mustAddInternal(lr.streamIDs[i], timestamp, lr.rows[i], lr.streamTagsCanonicals[i]) + } + + // Create inmemory part from lr + mp := getInmemoryPart() + mp.mustInitFromRows(lr) + + // Check mp.ph + ph := &mp.ph + checkCompressionRate(t, ph, compressionRateExpected) + if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected { + t.Fatalf("unexpected UncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected) + } + if ph.RowsCount != uint64(rowsCountExpected) { + t.Fatalf("unexpected rowsCount in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected) + } + if ph.BlocksCount != uint64(blocksCountExpected) { + t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected) + } + if ph.RowsCount > 0 { + if ph.MinTimestamp != minTimestampExpected { + t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected) + } + if ph.MaxTimestamp != maxTimestampExpected { + t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected) + } + } + + // Read log entries from mp to rrsResult + sbu := getStringsBlockUnmarshaler() + defer putStringsBlockUnmarshaler(sbu) + vd := getValuesDecoder() + defer putValuesDecoder(vd) + lrResult := mp.readLogRows(sbu, vd) + putInmemoryPart(mp) + + // compare lrOrig to lrResult + if err := checkEqualRows(lrResult, lrOrig); err != nil { + t.Fatalf("unequal log entries: %s", err) + } + } + + f(GetLogRows(nil, nil), 0, 0) + + // Check how inmemoryPart works with a single stream + f(newTestLogRows(1, 1, 0), 1, 0.8) + f(newTestLogRows(1, 2, 0), 1, 0.9) + f(newTestLogRows(1, 10, 0), 1, 2.0) + f(newTestLogRows(1, 1000, 0), 1, 7.1) + f(newTestLogRows(1, 20000, 0), 2, 7.2) + + // Check how inmemoryPart works with multiple streams + f(newTestLogRows(2, 1, 0), 2, 0.8) + f(newTestLogRows(10, 1, 0), 10, 0.9) + f(newTestLogRows(100, 1, 0), 100, 1.0) + f(newTestLogRows(10, 5, 0), 10, 1.4) + f(newTestLogRows(10, 1000, 0), 10, 7.2) + f(newTestLogRows(100, 100, 0), 100, 5.0) +} + +func checkCompressionRate(t *testing.T, ph *partHeader, compressionRateExpected float64) { + t.Helper() + compressionRate := float64(ph.UncompressedSizeBytes) / float64(ph.CompressedSizeBytes) + if math.Abs(compressionRate-compressionRateExpected) > 0.1 { + t.Fatalf("unexpected compression rate; got %.1f; want %.1f", compressionRate, compressionRateExpected) + } +} + +func TestInmemoryPartInitFromBlockStreamReaders(t *testing.T) { + f := func(lrs []*LogRows, blocksCountExpected int, compressionRateExpected float64) { + t.Helper() + + uncompressedSizeBytesExpected := uint64(0) + rowsCountExpected := 0 + minTimestampExpected := int64(math.MaxInt64) + maxTimestampExpected := int64(math.MinInt64) + + // make a copy of rrss in order to compare the results after merge. + lrOrig := GetLogRows(nil, nil) + for _, lr := range lrs { + uncompressedSizeBytesExpected += uncompressedRowsSizeBytes(lr.rows) + rowsCountExpected += len(lr.timestamps) + for j, timestamp := range lr.timestamps { + if timestamp < minTimestampExpected { + minTimestampExpected = timestamp + } + if timestamp > maxTimestampExpected { + maxTimestampExpected = timestamp + } + lrOrig.mustAddInternal(lr.streamIDs[j], timestamp, lr.rows[j], lr.streamTagsCanonicals[j]) + } + } + + // Initialize readers from lrs + var mpsSrc []*inmemoryPart + var bsrs []*blockStreamReader + for _, lr := range lrs { + mp := getInmemoryPart() + mp.mustInitFromRows(lr) + mpsSrc = append(mpsSrc, mp) + + bsr := getBlockStreamReader() + bsr.MustInitFromInmemoryPart(mp) + bsrs = append(bsrs, bsr) + } + defer func() { + for _, bsr := range bsrs { + putBlockStreamReader(bsr) + } + for _, mp := range mpsSrc { + putInmemoryPart(mp) + } + }() + + // Merge data from bsrs into mpDst + mpDst := getInmemoryPart() + bsw := getBlockStreamWriter() + bsw.MustInitForInmemoryPart(mpDst) + mustMergeBlockStreams(&mpDst.ph, bsw, bsrs, nil) + putBlockStreamWriter(bsw) + + // Check mpDst.ph stats + ph := &mpDst.ph + checkCompressionRate(t, ph, compressionRateExpected) + if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected { + t.Fatalf("unexpected uncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected) + } + if ph.RowsCount != uint64(rowsCountExpected) { + t.Fatalf("unexpected number of entries in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected) + } + if ph.BlocksCount != uint64(blocksCountExpected) { + t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected) + } + if ph.RowsCount > 0 { + if ph.MinTimestamp != minTimestampExpected { + t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected) + } + if ph.MaxTimestamp != maxTimestampExpected { + t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected) + } + } + + // Read log entries from mpDst to rrsResult + sbu := getStringsBlockUnmarshaler() + defer putStringsBlockUnmarshaler(sbu) + vd := getValuesDecoder() + defer putValuesDecoder(vd) + lrResult := mpDst.readLogRows(sbu, vd) + putInmemoryPart(mpDst) + + // compare rrsOrig to rrsResult + if err := checkEqualRows(lrResult, lrOrig); err != nil { + t.Fatalf("unequal log entries: %s", err) + } + } + + // Check empty readers + f(nil, 0, 0) + f([]*LogRows{GetLogRows(nil, nil)}, 0, 0) + f([]*LogRows{GetLogRows(nil, nil), GetLogRows(nil, nil)}, 0, 0) + + // Check merge with a single reader + f([]*LogRows{newTestLogRows(1, 1, 0)}, 1, 0.8) + f([]*LogRows{newTestLogRows(1, 10, 0)}, 1, 2.0) + f([]*LogRows{newTestLogRows(1, 100, 0)}, 1, 4.9) + f([]*LogRows{newTestLogRows(1, 1000, 0)}, 1, 7.1) + f([]*LogRows{newTestLogRows(1, 10000, 0)}, 1, 7.4) + f([]*LogRows{newTestLogRows(10, 1, 0)}, 10, 0.9) + f([]*LogRows{newTestLogRows(100, 1, 0)}, 100, 1.0) + f([]*LogRows{newTestLogRows(1000, 1, 0)}, 1000, 1.0) + f([]*LogRows{newTestLogRows(10, 10, 0)}, 10, 2.1) + f([]*LogRows{newTestLogRows(10, 100, 0)}, 10, 4.9) + + //Check merge with multiple readers + f([]*LogRows{ + newTestLogRows(1, 1, 0), + newTestLogRows(1, 1, 1), + }, 2, 0.9) + f([]*LogRows{ + newTestLogRows(2, 2, 0), + newTestLogRows(2, 2, 0), + }, 2, 1.8) + f([]*LogRows{ + newTestLogRows(1, 20, 0), + newTestLogRows(1, 10, 1), + newTestLogRows(1, 5, 2), + }, 3, 2.2) + f([]*LogRows{ + newTestLogRows(10, 20, 0), + newTestLogRows(20, 10, 1), + newTestLogRows(30, 5, 2), + }, 60, 2.0) + f([]*LogRows{ + newTestLogRows(10, 20, 0), + newTestLogRows(20, 10, 1), + newTestLogRows(30, 5, 2), + newTestLogRows(20, 7, 3), + newTestLogRows(10, 9, 4), + }, 90, 1.9) +} + +func newTestLogRows(streams, rowsPerStream int, seed int64) *LogRows { + streamTags := []string{ + "some-stream-tag", + } + lr := GetLogRows(streamTags, nil) + rng := rand.New(rand.NewSource(seed)) + var fields []Field + for i := 0; i < streams; i++ { + tenantID := TenantID{ + AccountID: rng.Uint32(), + ProjectID: rng.Uint32(), + } + for j := 0; j < rowsPerStream; j++ { + // Add stream tags + fields = append(fields[:0], Field{ + Name: "some-stream-tag", + Value: fmt.Sprintf("some-stream-value-%d", i), + }) + // Add the remaining tags + for k := 0; k < 5; k++ { + if rng.Float64() < 0.5 { + fields = append(fields, Field{ + Name: fmt.Sprintf("field_%d", k), + Value: fmt.Sprintf("value_%d_%d_%d", i, j, k), + }) + } + } + // add a message field + fields = append(fields, Field{ + Name: "", + Value: fmt.Sprintf("some row number %d at stream %d", j, i), + }) + // add a field with constant value + fields = append(fields, Field{ + Name: "job", + Value: "foobar", + }) + // add a field with uint value + fields = append(fields, Field{ + Name: "response_size_bytes", + Value: fmt.Sprintf("%d", rng.Intn(1234)), + }) + // shuffle fields in order to check de-shuffling algorithm + rng.Shuffle(len(fields), func(i, j int) { + fields[i], fields[j] = fields[j], fields[i] + }) + timestamp := rng.Int63() + lr.MustAdd(tenantID, timestamp, fields) + } + } + return lr +} + +func checkEqualRows(lrResult, lrOrig *LogRows) error { + if len(lrResult.timestamps) != len(lrOrig.timestamps) { + return fmt.Errorf("unexpected length LogRows; got %d; want %d", len(lrResult.timestamps), len(lrOrig.timestamps)) + } + + sort.Sort(lrResult) + sort.Sort(lrOrig) + + sortFieldNames := func(fields []Field) { + sort.Slice(fields, func(i, j int) bool { + return fields[i].Name < fields[j].Name + }) + } + for i := range lrOrig.timestamps { + if !lrOrig.streamIDs[i].equal(&lrResult.streamIDs[i]) { + return fmt.Errorf("unexpected streamID for log entry %d\ngot\n%s\nwant\n%s", i, &lrResult.streamIDs[i], &lrOrig.streamIDs[i]) + } + if lrOrig.timestamps[i] != lrResult.timestamps[i] { + return fmt.Errorf("unexpected timestamp for log entry %d\ngot\n%d\nwant\n%d", i, lrResult.timestamps[i], lrOrig.timestamps[i]) + } + fieldsOrig := lrOrig.rows[i] + fieldsResult := lrResult.rows[i] + if len(fieldsOrig) != len(fieldsResult) { + return fmt.Errorf("unexpected number of fields at log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig) + } + sortFieldNames(fieldsOrig) + sortFieldNames(fieldsResult) + if !reflect.DeepEqual(fieldsOrig, fieldsResult) { + return fmt.Errorf("unexpected fields for log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig) + } + } + return nil +} + +// readLogRows reads log entries from mp. +// +// This function is for testing and debugging purposes only. +func (mp *inmemoryPart) readLogRows(sbu *stringsBlockUnmarshaler, vd *valuesDecoder) *LogRows { + lr := GetLogRows(nil, nil) + bsr := getBlockStreamReader() + defer putBlockStreamReader(bsr) + bsr.MustInitFromInmemoryPart(mp) + var tmp rows + for bsr.NextBlock() { + bd := &bsr.blockData + streamID := bd.streamID + if err := bd.unmarshalRows(&tmp, sbu, vd); err != nil { + logger.Panicf("BUG: cannot unmarshal log entries from inmemoryPart: %s", err) + } + for i, timestamp := range tmp.timestamps { + lr.MustAdd(streamID.tenantID, timestamp, tmp.rows[i]) + lr.streamIDs[len(lr.streamIDs)-1] = streamID + } + tmp.reset() + } + return lr +} diff --git a/lib/logstorage/inmemory_part_timing_test.go b/lib/logstorage/inmemory_part_timing_test.go new file mode 100644 index 000000000..ccebe4f14 --- /dev/null +++ b/lib/logstorage/inmemory_part_timing_test.go @@ -0,0 +1,34 @@ +package logstorage + +import ( + "fmt" + "testing" +) + +func BenchmarkInmemoryPart_MustInitFromRows(b *testing.B) { + for _, streams := range []int{1, 10, 100} { + b.Run(fmt.Sprintf("streams_%d", streams), func(b *testing.B) { + for _, rowsPerStream := range []int{1, 10, 100, 1000} { + b.Run(fmt.Sprintf("rowsPerStream_%d", rowsPerStream), func(b *testing.B) { + benchmarkInmemoryPartMustInitFromRows(b, streams, rowsPerStream) + }) + } + }) + } +} + +func benchmarkInmemoryPartMustInitFromRows(b *testing.B, streams, rowsPerStream int) { + b.ReportAllocs() + b.SetBytes(int64(streams * rowsPerStream)) + b.RunParallel(func(pb *testing.PB) { + lr := newTestLogRows(streams, rowsPerStream, 0) + mp := getInmemoryPart() + for pb.Next() { + mp.mustInitFromRows(lr) + if mp.ph.RowsCount != uint64(len(lr.timestamps)) { + panic(fmt.Errorf("unexpecte number of entries in the output stream; got %d; want %d", mp.ph.RowsCount, len(lr.timestamps))) + } + } + putInmemoryPart(mp) + }) +} diff --git a/lib/logstorage/log_rows.go b/lib/logstorage/log_rows.go new file mode 100644 index 000000000..2192fa0d2 --- /dev/null +++ b/lib/logstorage/log_rows.go @@ -0,0 +1,277 @@ +package logstorage + +import ( + "sort" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" +) + +// LogRows holds a set of rows needed for Storage.MustAddRows +// +// LogRows must be obtained via GetLogRows() +type LogRows struct { + // buf holds all the bytes referred by items in LogRows + buf []byte + + // fieldsBuf holds all the fields referred by items in LogRows + fieldsBuf []Field + + // streamIDs holds streamIDs for rows added to LogRows + streamIDs []streamID + + // streamTagsCanonicals holds streamTagsCanonical entries for rows added to LogRows + streamTagsCanonicals [][]byte + + // timestamps holds stimestamps for rows added to LogRows + timestamps []int64 + + // rows holds fields for rows atted to LogRows. + rows [][]Field + + // sf is a helper for sorting fields in every added row + sf sortedFields + + // streamFields contains names for stream fields + streamFields map[string]struct{} + + // ignoreFields contains names for log fields, which must be skipped during data ingestion + ignoreFields map[string]struct{} +} + +type sortedFields []Field + +func (sf *sortedFields) Len() int { + return len(*sf) +} + +func (sf *sortedFields) Less(i, j int) bool { + a := *sf + return a[i].Name < a[j].Name +} + +func (sf *sortedFields) Swap(i, j int) { + a := *sf + a[i], a[j] = a[j], a[i] +} + +// RowFormatter implementes fmt.Stringer for []Field aka a single log row +type RowFormatter []Field + +// String returns user-readable representation for rf +func (rf *RowFormatter) String() string { + b := append([]byte{}, '{') + + fields := *rf + if len(fields) > 0 { + b = append(b, fields[0].String()...) + fields = fields[1:] + for _, field := range fields { + b = append(b, ',') + b = append(b, field.String()...) + } + } + + b = append(b, '}') + return string(b) +} + +// Reset resets lr +func (lr *LogRows) Reset() { + lr.buf = lr.buf[:0] + + fb := lr.fieldsBuf + for i := range fb { + fb[i].Reset() + } + lr.fieldsBuf = fb[:0] + + sids := lr.streamIDs + for i := range sids { + sids[i].reset() + } + lr.streamIDs = sids[:0] + + sns := lr.streamTagsCanonicals + for i := range sns { + sns[i] = nil + } + lr.streamTagsCanonicals = sns[:0] + + lr.timestamps = lr.timestamps[:0] + + rows := lr.rows + for i := range rows { + rows[i] = nil + } + lr.rows = rows[:0] + + lr.sf = nil + + sfs := lr.streamFields + for k := range sfs { + delete(sfs, k) + } + + ifs := lr.ignoreFields + for k := range ifs { + delete(ifs, k) + } +} + +// NeedFlush returns true if lr contains too much data, so it must be flushed to the storage. +func (lr *LogRows) NeedFlush() bool { + return len(lr.buf) > (maxUncompressedBlockSize/8)*7 +} + +// MustAdd adds a log entry with the given args to lr. +// +// It is OK to modify the args after returning from the function, +// since lr copies all the args to internal data. +func (lr *LogRows) MustAdd(tenantID TenantID, timestamp int64, fields []Field) { + // Compose StreamTags from fields according to lr.streamFields + sfs := lr.streamFields + st := GetStreamTags() + for i := range fields { + f := &fields[i] + if _, ok := sfs[f.Name]; ok { + st.Add(f.Name, f.Value) + } + } + + // Marshal StreamTags + bb := bbPool.Get() + bb.B = st.MarshalCanonical(bb.B) + PutStreamTags(st) + + // Calculate the id for the StreamTags + var sid streamID + sid.tenantID = tenantID + sid.id = hash128(bb.B) + + // Store the row + lr.mustAddInternal(sid, timestamp, fields, bb.B) + bbPool.Put(bb) +} + +func (lr *LogRows) mustAddInternal(sid streamID, timestamp int64, fields []Field, streamTagsCanonical []byte) { + buf := lr.buf + bufLen := len(buf) + buf = append(buf, streamTagsCanonical...) + + lr.streamTagsCanonicals = append(lr.streamTagsCanonicals, buf[bufLen:]) + lr.streamIDs = append(lr.streamIDs, sid) + lr.timestamps = append(lr.timestamps, timestamp) + + // Store all the fields + ifs := lr.ignoreFields + fb := lr.fieldsBuf + fieldsLen := len(fb) + for i := range fields { + f := &fields[i] + + if _, ok := ifs[f.Name]; ok { + // Skip fields from the ifs map + continue + } + if f.Value == "" { + // Skip fields without values + continue + } + + fb = append(fb, Field{}) + dstField := &fb[len(fb)-1] + + bufLen = len(buf) + if f.Name != "_msg" { + buf = append(buf, f.Name...) + } + dstField.Name = bytesutil.ToUnsafeString(buf[bufLen:]) + + bufLen = len(buf) + buf = append(buf, f.Value...) + dstField.Value = bytesutil.ToUnsafeString(buf[bufLen:]) + } + lr.sf = fb[fieldsLen:] + sort.Sort(&lr.sf) + lr.rows = append(lr.rows, lr.sf) + + lr.fieldsBuf = fb + lr.buf = buf +} + +// GetLogRows returns LogRows from the pool for the given streamFields. +// +// streamFields is a set of field names, which must be associated with the stream. +// +// Return back it to the pool with PutLogRows() when it is no longer needed. +func GetLogRows(streamFields, ignoreFields []string) *LogRows { + v := logRowsPool.Get() + if v == nil { + v = &LogRows{} + } + lr := v.(*LogRows) + + // Initialize streamFields + sfs := lr.streamFields + if sfs == nil { + sfs = make(map[string]struct{}, len(streamFields)) + lr.streamFields = sfs + } + for _, f := range streamFields { + sfs[f] = struct{}{} + } + + // Initialize ignoreFields + ifs := lr.ignoreFields + if ifs == nil { + ifs = make(map[string]struct{}, len(ignoreFields)) + lr.ignoreFields = ifs + } + for _, f := range ignoreFields { + if f != "" { + ifs[f] = struct{}{} + } + } + + return lr +} + +// PutLogRows returns lr to the pool. +func PutLogRows(lr *LogRows) { + lr.Reset() + logRowsPool.Put(lr) +} + +var logRowsPool sync.Pool + +// Len returns the number of items in lr. +func (lr *LogRows) Len() int { + return len(lr.streamIDs) +} + +// Less returns true if (streamID, timestamp) for row i is smaller than the (streamID, timestamp) for row j +func (lr *LogRows) Less(i, j int) bool { + a := &lr.streamIDs[i] + b := &lr.streamIDs[j] + if !a.equal(b) { + return a.less(b) + } + return lr.timestamps[i] < lr.timestamps[j] +} + +// Swap swaps rows i and j in lr. +func (lr *LogRows) Swap(i, j int) { + a := &lr.streamIDs[i] + b := &lr.streamIDs[j] + *a, *b = *b, *a + + tsA, tsB := &lr.timestamps[i], &lr.timestamps[j] + *tsA, *tsB = *tsB, *tsA + + snA, snB := &lr.streamTagsCanonicals[i], &lr.streamTagsCanonicals[j] + *snA, *snB = *snB, *snA + + fieldsA, fieldsB := &lr.rows[i], &lr.rows[j] + *fieldsA, *fieldsB = *fieldsB, *fieldsA +} diff --git a/lib/logstorage/log_rows_timing_test.go b/lib/logstorage/log_rows_timing_test.go new file mode 100644 index 000000000..55a726e1b --- /dev/null +++ b/lib/logstorage/log_rows_timing_test.go @@ -0,0 +1,83 @@ +package logstorage + +import ( + "testing" +) + +func BenchmarkLogRowsMustAdd(b *testing.B) { + rows := newBenchRows(map[string]string{ + "input.type": "filestream", + "ecs.version": "8.0.0", + "host.hostname": "foobar-baz-abc", + "host.architecture": "x86_64", + "host.name": "foobar-baz-abc", + "host.os.codename": "bionic", + "host.os.type": "linux", + "host.os.platform": "ubuntu", + "host.os.version": "18.04.6 LTS (Bionic Beaver)", + "host.os.family": "debian", + "host.os.name": "Ubuntu", + "host.os.kernel": "4.15.0-211-generic", + "host.id": "a634d50249af449dbcb3ce724822568a", + "host.containerized": "false", + "host.ip": `["10.0.0.42","10.224.112.1","172.20.0.1","172.18.0.1","172.19.0.1","fc00:f853:ccd:e793::1","fe80::1","172.21.0.1","172.17.0.1"]`, + "host.mac": `["02-42-42-90-52-D9","02-42-C6-48-A6-84","02-42-FD-91-7E-17","52-54-00-F5-13-E7","54-E1-AD-89-1A-4C","F8-34-41-3C-C0-85"]`, + "agent.ephemeral_id": "6c251f67-7210-4cef-8f72-a9546cbb48cc", + "agent.id": "e97243c5-5ef3-4dc1-8828-504f68731e87", + "agent.name": "foobar-baz-abc", + "agent.type": "filebeat", + "agent.version": "8.8.0", + "log.file.path": "/var/log/auth.log", + "log.offset": "37908", + }, []string{ + "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=0)", + "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=1)", + "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=2)", + "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=3)", + "Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=4)", + }) + streamFields := []string{ + "host.hostname", + "agent.name", + "log.file.path", + } + + b.ReportAllocs() + b.SetBytes(int64(len(rows))) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + benchmarkLogRowsMustAdd(rows, streamFields) + } + }) +} + +func benchmarkLogRowsMustAdd(rows [][]Field, streamFields []string) { + lr := GetLogRows(streamFields, nil) + var tid TenantID + for i, fields := range rows { + tid.AccountID = uint32(i) + tid.ProjectID = uint32(2 * i) + timestamp := int64(i) * 1000 + lr.MustAdd(tid, timestamp, fields) + } + PutLogRows(lr) +} + +func newBenchRows(constFields map[string]string, messages []string) [][]Field { + rows := make([][]Field, 0, len(messages)) + for _, msg := range messages { + row := make([]Field, 0, len(constFields)+1) + for k, v := range constFields { + row = append(row, Field{ + Name: k, + Value: v, + }) + } + row = append(row, Field{ + Name: "_msg", + Value: msg, + }) + rows = append(rows, row) + } + return rows +} diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go new file mode 100644 index 000000000..9768939d2 --- /dev/null +++ b/lib/logstorage/parser.go @@ -0,0 +1,1100 @@ +package logstorage + +import ( + "fmt" + "math" + "regexp" + "sort" + "strconv" + "strings" + "time" + "unicode" + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" +) + +type lexer struct { + // s contains unparsed tail of sOrig + s string + + // sOrig contains the original string + sOrig string + + // token contains the current token + // + // an empty token means the end of s + token string + + // rawToken contains raw token before unquoting + rawToken string + + // prevToken contains the previously parsed token + prevToken string + + // isSkippedSpace is set to true if there was a whitespace before the token in s + isSkippedSpace bool + + // currentTimestamp is the current timestamp in nanoseconds + currentTimestamp int64 +} + +func newLexer(s string) *lexer { + return &lexer{ + s: s, + sOrig: s, + currentTimestamp: time.Now().UnixNano(), + } +} + +func (lex *lexer) isEnd() bool { + return len(lex.s) == 0 && len(lex.token) == 0 && len(lex.rawToken) == 0 +} + +func (lex *lexer) isQuotedToken() bool { + return lex.token != lex.rawToken +} + +func (lex *lexer) isPrevToken(tokens ...string) bool { + for _, token := range tokens { + if token == lex.prevToken { + return true + } + } + return false +} + +func (lex *lexer) isKeyword(keywords ...string) bool { + if lex.isQuotedToken() { + return false + } + tokenLower := strings.ToLower(lex.token) + for _, kw := range keywords { + if kw == tokenLower { + return true + } + } + return false +} + +func (lex *lexer) context() string { + tail := lex.sOrig + tail = tail[:len(tail)-len(lex.s)] + if len(tail) > 50 { + tail = tail[len(tail)-50:] + } + return tail +} + +func (lex *lexer) mustNextToken() bool { + lex.nextToken() + return !lex.isEnd() +} + +func (lex *lexer) nextCharToken(s string, size int) { + lex.token = s[:size] + lex.rawToken = lex.token + lex.s = s[size:] +} + +// nextToken updates lex.token to the next token. +func (lex *lexer) nextToken() { + s := lex.s + lex.prevToken = lex.token + lex.token = "" + lex.rawToken = "" + lex.isSkippedSpace = false + if len(s) == 0 { + return + } + r, size := utf8.DecodeRuneInString(s) + if r == utf8.RuneError { + lex.nextCharToken(s, size) + return + } + + // Skip whitespace + for unicode.IsSpace(r) { + lex.isSkippedSpace = true + s = s[size:] + r, size = utf8.DecodeRuneInString(s) + } + + // Try decoding simple token + tokenLen := 0 + for isTokenRune(r) || r == '.' { + tokenLen += size + r, size = utf8.DecodeRuneInString(s[tokenLen:]) + } + if tokenLen > 0 { + lex.nextCharToken(s, tokenLen) + return + } + + switch r { + case '"', '`': + prefix, err := strconv.QuotedPrefix(s) + if err != nil { + lex.nextCharToken(s, 1) + return + } + token, err := strconv.Unquote(prefix) + if err != nil { + lex.nextCharToken(s, 1) + return + } + lex.token = token + lex.rawToken = prefix + lex.s = s[len(prefix):] + return + case '\'': + var b []byte + for !strings.HasPrefix(s[size:], "'") { + ch, _, newTail, err := strconv.UnquoteChar(s[size:], '\'') + if err != nil { + lex.nextCharToken(s, 1) + return + } + b = utf8.AppendRune(b, ch) + size += len(s[size:]) - len(newTail) + } + size++ + lex.token = string(b) + lex.rawToken = string(s[:size]) + lex.s = s[size:] + return + case '=': + if strings.HasPrefix(s[size:], "~") { + lex.nextCharToken(s, 2) + return + } + lex.nextCharToken(s, 1) + return + case '!': + if strings.HasPrefix(s[size:], "~") || strings.HasPrefix(s[size:], "=") { + lex.nextCharToken(s, 2) + return + } + lex.nextCharToken(s, 1) + return + default: + lex.nextCharToken(s, size) + return + } +} + +// Query represents LogsQL query. +type Query struct { + f filter +} + +// String returns string representation for q. +func (q *Query) String() string { + return q.f.String() +} + +func (q *Query) getResultColumnNames() []string { + m := make(map[string]struct{}) + q.f.updateReferencedColumnNames(m) + + // Substitute an empty column name with _msg column + if _, ok := m[""]; ok { + delete(m, "") + m["_msg"] = struct{}{} + } + + // unconditionally select _time, _stream and _msg columns + // TODO: add the ability to filter out these columns + m["_time"] = struct{}{} + m["_stream"] = struct{}{} + m["_msg"] = struct{}{} + + columnNames := make([]string, 0, len(m)) + for k := range m { + columnNames = append(columnNames, k) + } + sort.Strings(columnNames) + return columnNames +} + +// ParseQuery parses s. +func ParseQuery(s string) (*Query, error) { + lex := newLexer(s) + + f, err := parseFilter(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse filter expression: %w; context: %s", err, lex.context()) + } + if !lex.isEnd() { + return nil, fmt.Errorf("unexpected tail: %q", lex.s) + } + + q := &Query{ + f: f, + } + return q, nil +} + +func parseFilter(lex *lexer) (filter, error) { + if !lex.mustNextToken() || lex.isKeyword("|") { + return nil, fmt.Errorf("missing query") + } + af, err := parseOrFilter(lex, "") + if err != nil { + return nil, err + } + return af, nil +} + +func parseOrFilter(lex *lexer, fieldName string) (filter, error) { + var filters []filter + for { + f, err := parseAndFilter(lex, fieldName) + if err != nil { + return nil, err + } + filters = append(filters, f) + switch { + case lex.isKeyword("|", ")", ""): + if len(filters) == 1 { + return filters[0], nil + } + of := &orFilter{ + filters: filters, + } + return of, nil + case lex.isKeyword("or"): + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filter after 'or'") + } + } + } +} + +func parseAndFilter(lex *lexer, fieldName string) (filter, error) { + var filters []filter + for { + f, err := parseGenericFilter(lex, fieldName) + if err != nil { + return nil, err + } + filters = append(filters, f) + switch { + case lex.isKeyword("or", "|", ")", ""): + if len(filters) == 1 { + return filters[0], nil + } + af := &andFilter{ + filters: filters, + } + return af, nil + case lex.isKeyword("and"): + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filter after 'and'") + } + } + } +} + +func parseGenericFilter(lex *lexer, fieldName string) (filter, error) { + // Check for special keywords + switch { + case lex.isKeyword(":"): + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filter after ':'") + } + return parseGenericFilter(lex, fieldName) + case lex.isKeyword("*"): + lex.nextToken() + f := &prefixFilter{ + fieldName: fieldName, + prefix: "", + } + return f, nil + case lex.isKeyword("("): + if !lex.isSkippedSpace && !lex.isPrevToken("", ":", "(", "!", "not") { + return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken) + } + return parseParensFilter(lex, fieldName) + case lex.isKeyword("not", "!"): + return parseNotFilter(lex, fieldName) + case lex.isKeyword("exact"): + return parseExactFilter(lex, fieldName) + case lex.isKeyword("exact_prefix"): + return parseExactPrefixFilter(lex, fieldName) + case lex.isKeyword("i"): + return parseAnyCaseFilter(lex, fieldName) + case lex.isKeyword("in"): + return parseInFilter(lex, fieldName) + case lex.isKeyword("ipv4_range"): + return parseIPv4RangeFilter(lex, fieldName) + case lex.isKeyword("len_range"): + return parseLenRangeFilter(lex, fieldName) + case lex.isKeyword("range"): + return parseRangeFilter(lex, fieldName) + case lex.isKeyword("re"): + return parseRegexpFilter(lex, fieldName) + case lex.isKeyword("seq"): + return parseSequenceFilter(lex, fieldName) + case lex.isKeyword("string_range"): + return parseStringRangeFilter(lex, fieldName) + case lex.isKeyword(`"`, "'", "`"): + return nil, fmt.Errorf("improperly quoted string") + case lex.isKeyword(",", ")", "[", "]"): + return nil, fmt.Errorf("unexpected token %q", lex.token) + } + phrase := getCompoundPhrase(lex, fieldName) + return parseFilterForPhrase(lex, phrase, fieldName) +} + +func getCompoundPhrase(lex *lexer, fieldName string) string { + phrase := lex.token + rawPhrase := lex.rawToken + lex.nextToken() + suffix := getCompoundSuffix(lex, fieldName) + if suffix == "" { + return phrase + } + return rawPhrase + suffix +} + +func getCompoundSuffix(lex *lexer, fieldName string) string { + s := "" + stopTokens := []string{"*", ",", "(", ")", "[", "]", "|", ""} + if fieldName == "" { + stopTokens = append(stopTokens, ":") + } + for !lex.isSkippedSpace && !lex.isKeyword(stopTokens...) { + s += lex.rawToken + lex.nextToken() + } + return s +} + +func getCompoundToken(lex *lexer) string { + s := lex.token + rawS := lex.rawToken + lex.nextToken() + suffix := "" + for !lex.isSkippedSpace && !lex.isKeyword(",", "(", ")", "[", "]", "|", "") { + s += lex.token + lex.nextToken() + } + if suffix == "" { + return s + } + return rawS + suffix +} + +func getCompoundFuncArg(lex *lexer) string { + if lex.isKeyword("*") { + return "" + } + arg := lex.token + rawArg := lex.rawToken + lex.nextToken() + suffix := "" + for !lex.isSkippedSpace && !lex.isKeyword("*", ",", ")", "") { + suffix += lex.rawToken + lex.nextToken() + } + if suffix == "" { + return arg + } + return rawArg + suffix +} + +func parseFilterForPhrase(lex *lexer, phrase, fieldName string) (filter, error) { + if fieldName != "" || !lex.isKeyword(":") { + // The phrase is either a search phrase or a search prefix. + if lex.isKeyword("*") && !lex.isSkippedSpace { + // The phrase is a search prefix in the form `foo*`. + lex.nextToken() + f := &prefixFilter{ + fieldName: fieldName, + prefix: phrase, + } + return f, nil + } + // The phrase is a search phrase. + f := &phraseFilter{ + fieldName: fieldName, + phrase: phrase, + } + return f, nil + } + + // The phrase contains the field name. + fieldName = phrase + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filter after field name %s", quoteTokenIfNeeded(fieldName)) + } + switch fieldName { + case "_time": + return parseTimeFilter(lex) + case "_stream": + return parseStreamFilter(lex) + default: + return parseGenericFilter(lex, fieldName) + } +} + +func parseParensFilter(lex *lexer, fieldName string) (filter, error) { + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filter after '('") + } + f, err := parseOrFilter(lex, fieldName) + if err != nil { + return nil, err + } + if !lex.isKeyword(")") { + return nil, fmt.Errorf("unexpected token %q instead of ')'", lex.token) + } + lex.nextToken() + return f, nil +} + +func parseNotFilter(lex *lexer, fieldName string) (filter, error) { + notKeyword := lex.token + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing filters after '%s'", notKeyword) + } + f, err := parseGenericFilter(lex, fieldName) + if err != nil { + return nil, err + } + nf, ok := f.(*notFilter) + if ok { + return nf.f, nil + } + nf = ¬Filter{ + f: f, + } + return nf, nil +} + +func parseAnyCaseFilter(lex *lexer, fieldName string) (filter, error) { + phrase := lex.token + lex.nextToken() + if !lex.isKeyword("(") { + phrase += getCompoundSuffix(lex, fieldName) + return parseFilterForPhrase(lex, phrase, fieldName) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing arg for i()") + } + phrase = getCompoundFuncArg(lex) + isPrefixFilter := false + if lex.isKeyword("*") && !lex.isSkippedSpace { + isPrefixFilter = true + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing ')' after i()") + } + } + if !lex.isKeyword(")") { + return nil, fmt.Errorf("unexpected token %q instead of ')' in i()", lex.token) + } + lex.nextToken() + + if isPrefixFilter { + f := &anyCasePrefixFilter{ + fieldName: fieldName, + prefix: phrase, + } + return f, nil + } + f := &anyCasePhraseFilter{ + fieldName: fieldName, + phrase: phrase, + } + return f, nil +} + +func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) { + funcName := lex.token + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + if len(args) != 2 { + return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args)) + } + minLen, err := strconv.ParseUint(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err) + } + maxLen, err := strconv.ParseUint(args[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err) + } + rf := &lenRangeFilter{ + fieldName: fieldName, + minLen: minLen, + maxLen: maxLen, + } + return rf, nil + }) +} + +func parseStringRangeFilter(lex *lexer, fieldName string) (filter, error) { + funcName := lex.token + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + if len(args) != 2 { + return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args)) + } + rf := &stringRangeFilter{ + fieldName: fieldName, + minValue: args[0], + maxValue: args[1], + } + return rf, nil + }) +} + +func parseIPv4RangeFilter(lex *lexer, fieldName string) (filter, error) { + funcName := lex.token + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + if len(args) == 1 { + minValue, maxValue, ok := tryParseIPv4CIDR(args[0]) + if !ok { + return nil, fmt.Errorf("cannot parse IPv4 address or IPv4 CIDR %q at %s()", args[0], funcName) + } + rf := &ipv4RangeFilter{ + fieldName: fieldName, + minValue: minValue, + maxValue: maxValue, + } + return rf, nil + } + if len(args) != 2 { + return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args)) + } + minValue, ok := tryParseIPv4(args[0]) + if !ok { + return nil, fmt.Errorf("cannot parse lower bound ip %q in %s()", funcName, args[0]) + } + maxValue, ok := tryParseIPv4(args[1]) + if !ok { + return nil, fmt.Errorf("cannot parse upper bound ip %q in %s()", funcName, args[1]) + } + rf := &ipv4RangeFilter{ + fieldName: fieldName, + minValue: minValue, + maxValue: maxValue, + } + return rf, nil + }) +} + +func tryParseIPv4CIDR(s string) (uint32, uint32, bool) { + n := strings.IndexByte(s, '/') + if n < 0 { + n, ok := tryParseIPv4(s) + return n, n, ok + } + ip, ok := tryParseIPv4(s[:n]) + if !ok { + return 0, 0, false + } + maskBits, ok := tryParseUint64(s[n+1:]) + if !ok || maskBits > 32 { + return 0, 0, false + } + mask := uint32((1 << (32 - maskBits)) - 1) + minValue := ip &^ mask + maxValue := ip | mask + return minValue, maxValue, true +} + +func parseInFilter(lex *lexer, fieldName string) (filter, error) { + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + f := &inFilter{ + fieldName: fieldName, + values: args, + } + return f, nil + }) +} + +func parseSequenceFilter(lex *lexer, fieldName string) (filter, error) { + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + sf := &sequenceFilter{ + fieldName: fieldName, + phrases: args, + } + return sf, nil + }) +} + +func parseExactFilter(lex *lexer, fieldName string) (filter, error) { + return parseFuncArg(lex, fieldName, func(arg string) (filter, error) { + ef := &exactFilter{ + fieldName: fieldName, + value: arg, + } + return ef, nil + }) +} + +func parseExactPrefixFilter(lex *lexer, fieldName string) (filter, error) { + return parseFuncArg(lex, fieldName, func(arg string) (filter, error) { + ef := &exactPrefixFilter{ + fieldName: fieldName, + prefix: arg, + } + return ef, nil + }) +} + +func parseRegexpFilter(lex *lexer, fieldName string) (filter, error) { + funcName := lex.token + return parseFuncArg(lex, fieldName, func(arg string) (filter, error) { + re, err := regexp.Compile(arg) + if err != nil { + return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err) + } + rf := ®expFilter{ + fieldName: fieldName, + re: re, + } + return rf, nil + }) +} + +func parseRangeFilter(lex *lexer, fieldName string) (filter, error) { + funcName := lex.token + lex.nextToken() + + // Parse minValue + includeMinValue := false + switch { + case lex.isKeyword("("): + includeMinValue = false + case lex.isKeyword("["): + includeMinValue = true + default: + phrase := funcName + getCompoundSuffix(lex, fieldName) + return parseFilterForPhrase(lex, phrase, fieldName) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing args for %s()", funcName) + } + minValue, minValueStr, err := parseFloat64(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse minValue in %s(): %w", funcName, err) + } + + // Parse comma + if !lex.isKeyword(",") { + return nil, fmt.Errorf("unexpected token %q ater %q in %s(); want ','", lex.token, minValueStr, funcName) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing maxValue in %s()", funcName) + } + + // Parse maxValue + maxValue, maxValueStr, err := parseFloat64(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse maxValue in %s(): %w", funcName, err) + } + includeMaxValue := false + switch { + case lex.isKeyword(")"): + includeMaxValue = false + case lex.isKeyword("]"): + includeMaxValue = true + default: + return nil, fmt.Errorf("unexpected closing token %q in %s(); want ')' or ']'", lex.token, funcName) + } + lex.nextToken() + + stringRepr := "" + if includeMinValue { + stringRepr += "[" + } else { + stringRepr += "(" + minValue = math.Nextafter(minValue, math.Inf(1)) + } + stringRepr += minValueStr + "," + maxValueStr + if includeMaxValue { + stringRepr += "]" + } else { + stringRepr += ")" + maxValue = math.Nextafter(maxValue, math.Inf(-1)) + } + + rf := &rangeFilter{ + fieldName: fieldName, + minValue: minValue, + maxValue: maxValue, + + stringRepr: stringRepr, + } + return rf, nil +} + +func parseFloat64(lex *lexer) (float64, string, error) { + s := getCompoundToken(lex) + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err) + } + return f, s, nil +} + +func parseFuncArg(lex *lexer, fieldName string, callback func(args string) (filter, error)) (filter, error) { + funcName := lex.token + return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { + if len(args) != 1 { + return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 1", funcName, len(args)) + } + return callback(args[0]) + }) +} + +func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (filter, error)) (filter, error) { + funcName := lex.token + lex.nextToken() + if !lex.isKeyword("(") { + phrase := funcName + getCompoundSuffix(lex, fieldName) + return parseFilterForPhrase(lex, phrase, fieldName) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing args for %s()", funcName) + } + var args []string + for !lex.isKeyword(")") { + if lex.isKeyword(",") { + return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName) + } + arg := getCompoundFuncArg(lex) + args = append(args, arg) + if lex.isKeyword(")") { + break + } + if !lex.isKeyword(",") { + return nil, fmt.Errorf("missing ',' after %q in %s()", arg, funcName) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing the next arg after %q in %s()", arg, funcName) + } + } + lex.nextToken() + + return callback(args) +} + +func parseTimeFilter(lex *lexer) (*timeFilter, error) { + startTimeInclude := false + switch { + case lex.isKeyword("["): + startTimeInclude = true + case lex.isKeyword("("): + startTimeInclude = false + default: + // Try parsing '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)' + startTime, stringRepr, err := parseTime(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse _time filter: %w", err) + } + endTime := getMatchingEndTime(startTime, stringRepr) + tf := &timeFilter{ + minTimestamp: startTime, + maxTimestamp: endTime, + + stringRepr: stringRepr, + } + return tf, nil + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing start time in _time filter") + } + + // Parse start time + startTime, startTimeString, err := parseTime(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse start time in _time filter: %w", err) + } + if !lex.isKeyword(",") { + return nil, fmt.Errorf("unexpected token after start time in _time filter: %q; want ','", lex.token) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing end time in _time filter") + } + + // Parse end time + endTime, endTimeString, err := parseTime(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse end time in _time filter: %w", err) + } + + endTimeInclude := false + switch { + case lex.isKeyword("]"): + endTimeInclude = true + case lex.isKeyword(")"): + endTimeInclude = false + default: + return nil, fmt.Errorf("_time filter ends with unexpected token %q; it must end with ']' or ')'", lex.token) + } + lex.nextToken() + + stringRepr := "" + if startTimeInclude { + stringRepr += "[" + } else { + stringRepr += "(" + startTime++ + } + stringRepr += startTimeString + "," + endTimeString + if endTimeInclude { + stringRepr += "]" + endTime = getMatchingEndTime(endTime, endTimeString) + } else { + stringRepr += ")" + endTime-- + } + + tf := &timeFilter{ + minTimestamp: startTime, + maxTimestamp: endTime, + + stringRepr: stringRepr, + } + return tf, nil +} + +func getMatchingEndTime(startTime int64, stringRepr string) int64 { + tStart := time.Unix(0, startTime).UTC() + tEnd := tStart + timeStr := stripTimezoneSuffix(stringRepr) + switch { + case len(timeStr) == len("YYYY"): + y, m, d := tStart.Date() + nsec := startTime % (24 * 3600 * 1e9) + tEnd = time.Date(y+1, m, d, 0, 0, int(nsec/1e9), int(nsec%1e9), time.UTC) + case len(timeStr) == len("YYYY-MM") && timeStr[len("YYYY")] == '-': + y, m, d := tStart.Date() + nsec := startTime % (24 * 3600 * 1e9) + if d != 1 { + d = 0 + m++ + } + tEnd = time.Date(y, m+1, d, 0, 0, int(nsec/1e9), int(nsec%1e9), time.UTC) + case len(timeStr) == len("YYYY-MM-DD") && timeStr[len("YYYY")] == '-': + tEnd = tStart.Add(24 * time.Hour) + case len(timeStr) == len("YYYY-MM-DDThh") && timeStr[len("YYYY")] == '-': + tEnd = tStart.Add(time.Hour) + case len(timeStr) == len("YYYY-MM-DDThh:mm") && timeStr[len("YYYY")] == '-': + tEnd = tStart.Add(time.Minute) + case len(timeStr) == len("YYYY-MM-DDThh:mm:ss") && timeStr[len("YYYY")] == '-': + tEnd = tStart.Add(time.Second) + default: + tEnd = tStart.Add(time.Nanosecond) + } + return tEnd.UnixNano() - 1 +} + +func stripTimezoneSuffix(s string) string { + if strings.HasSuffix(s, "Z") { + return s[:len(s)-1] + } + if len(s) < 6 { + return s + } + tz := s[len(s)-6:] + if tz[0] != '-' && tz[0] != '+' { + return s + } + if tz[3] != ':' { + return s + } + return s[:len(s)-len(tz)] +} + +func parseStreamFilter(lex *lexer) (*streamFilter, error) { + if !lex.isKeyword("{") { + return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token) + } + if !lex.mustNextToken() { + return nil, fmt.Errorf("incomplete _stream filter after '{'") + } + var filters []*andStreamFilter + for { + f, err := parseAndStreamFilter(lex) + if err != nil { + return nil, err + } + filters = append(filters, f) + switch { + case lex.isKeyword("}"): + lex.nextToken() + sf := &streamFilter{ + f: &StreamFilter{ + orFilters: filters, + }, + } + return sf, nil + case lex.isKeyword("or"): + if !lex.mustNextToken() { + return nil, fmt.Errorf("incomplete _stream filter after 'or'") + } + if lex.isKeyword("}") { + return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter") + } + default: + return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token) + } + } +} + +func newStreamFilter(s string) (*StreamFilter, error) { + lex := newLexer(s) + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing '{' in _stream filter") + } + sf, err := parseStreamFilter(lex) + if err != nil { + return nil, err + } + return sf.f, nil +} + +func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) { + var filters []*streamTagFilter + for { + if lex.isKeyword("}") { + asf := &andStreamFilter{ + tagFilters: filters, + } + return asf, nil + } + f, err := parseStreamTagFilter(lex) + if err != nil { + return nil, err + } + filters = append(filters, f) + switch { + case lex.isKeyword("or", "}"): + asf := &andStreamFilter{ + tagFilters: filters, + } + return asf, nil + case lex.isKeyword(","): + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing stream filter after ','") + } + default: + return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token) + } + } +} + +func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) { + tagName := lex.token + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName) + } + if !lex.isKeyword("=", "!=", "=~", "!~") { + return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName) + } + op := lex.token + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing _stream filter value for %q field", tagName) + } + value := lex.token + if !lex.mustNextToken() { + return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value) + } + stf := &streamTagFilter{ + tagName: tagName, + op: op, + value: value, + } + return stf, nil +} + +func parseTime(lex *lexer) (int64, string, error) { + s := getCompoundToken(lex) + t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9) + if err != nil { + return 0, "", err + } + return int64(t * 1e9), s, nil +} + +func quoteTokenIfNeeded(s string) string { + if !needQuoteToken(s) { + return s + } + return strconv.Quote(s) +} + +func needQuoteToken(s string) bool { + sLower := strings.ToLower(s) + if _, ok := reservedKeywords[sLower]; ok { + return true + } + for _, r := range s { + if !isTokenRune(r) && r != '.' && r != '-' { + return true + } + } + return false +} + +var reservedKeywords = func() map[string]struct{} { + kws := []string{ + // An empty keyword means end of parsed string + "", + + // boolean operator tokens for 'foo and bar or baz not xxx' + "and", + "or", + "not", + "!", // synonym for "not" + + // parens for '(foo or bar) and baz' + "(", + ")", + + // stream filter tokens for '_stream:{foo=~"bar", baz="a"}' + "{", + "}", + "=", + "!=", + "=~", + "!~", + ",", + + // delimiter between query parts: + // 'foo and bar | extract "<*> foo