mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/victoria-logs: initial code release
This commit is contained in:
parent
aeac39cfd1
commit
87b66db47d
82 changed files with 31486 additions and 1 deletions
62
Makefile
62
Makefile
|
@ -21,6 +21,7 @@ include package/release/Makefile
|
|||
|
||||
all: \
|
||||
victoria-metrics-prod \
|
||||
victoria-logs-prod \
|
||||
vmagent-prod \
|
||||
vmalert-prod \
|
||||
vmauth-prod \
|
||||
|
@ -33,6 +34,7 @@ clean:
|
|||
|
||||
publish: docker-scan \
|
||||
publish-victoria-metrics \
|
||||
publish-victoria-logs \
|
||||
publish-vmagent \
|
||||
publish-vmalert \
|
||||
publish-vmauth \
|
||||
|
@ -42,6 +44,7 @@ publish: docker-scan \
|
|||
|
||||
package: \
|
||||
package-victoria-metrics \
|
||||
package-victoria-logs \
|
||||
package-vmagent \
|
||||
package-vmalert \
|
||||
package-vmauth \
|
||||
|
@ -178,6 +181,7 @@ publish-release:
|
|||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-victoria-logs \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
|
@ -191,7 +195,6 @@ release-victoria-metrics: \
|
|||
release-victoria-metrics-openbsd-amd64 \
|
||||
release-victoria-metrics-windows-amd64
|
||||
|
||||
# adds i386 arch
|
||||
release-victoria-metrics-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
|
@ -238,6 +241,63 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
|||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-victoria-logs: \
|
||||
release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
|
||||
release-victoria-logs-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-linux-arm:
|
||||
GOOS=linux GOARCH=arm $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-linux-arm64:
|
||||
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-darwin-arm64:
|
||||
GOOS=darwin GOARCH=arm64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-freebsd-amd64:
|
||||
GOOS=freebsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-openbsd-amd64:
|
||||
GOOS=openbsd GOARCH=amd64 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
||||
release-victoria-logs-windows-amd64:
|
||||
GOARCH=amd64 $(MAKE) release-victoria-logs-windows-goarch
|
||||
|
||||
release-victoria-logs-goos-goarch: victoria-logs-$(GOOS)-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-logs-$(GOOS)-$(GOARCH)-prod \
|
||||
&& sha256sum victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||
victoria-logs-$(GOOS)-$(GOARCH)-prod \
|
||||
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > victoria-logs-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf victoria-logs-$(GOOS)-$(GOARCH)-prod
|
||||
|
||||
release-victoria-logs-windows-goarch: victoria-logs-windows-$(GOARCH)-prod
|
||||
cd bin && \
|
||||
zip victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
victoria-logs-windows-$(GOARCH)-prod.exe \
|
||||
&& sha256sum victoria-logs-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||
victoria-logs-windows-$(GOARCH)-prod.exe \
|
||||
> victoria-logs-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||
cd bin && rm -rf \
|
||||
victoria-logs-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-vmutils: \
|
||||
release-vmutils-linux-386 \
|
||||
release-vmutils-linux-amd64 \
|
||||
|
|
103
app/victoria-logs/Makefile
Normal file
103
app/victoria-logs/Makefile
Normal file
|
@ -0,0 +1,103 @@
|
|||
# All these commands must run from repository root.
|
||||
|
||||
victoria-logs:
|
||||
APP_NAME=victoria-logs $(MAKE) app-local
|
||||
|
||||
victoria-logs-race:
|
||||
APP_NAME=victoria-logs RACE=-race $(MAKE) app-local
|
||||
|
||||
victoria-logs-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker
|
||||
|
||||
victoria-logs-pure-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-pure
|
||||
|
||||
victoria-logs-linux-amd64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-amd64
|
||||
|
||||
victoria-logs-linux-arm-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm
|
||||
|
||||
victoria-logs-linux-arm64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-arm64
|
||||
|
||||
victoria-logs-linux-ppc64le-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-ppc64le
|
||||
|
||||
victoria-logs-linux-386-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-linux-386
|
||||
|
||||
victoria-logs-darwin-amd64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
victoria-logs-darwin-arm64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-darwin-arm64
|
||||
|
||||
victoria-logs-freebsd-amd64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-freebsd-amd64
|
||||
|
||||
victoria-logs-openbsd-amd64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-openbsd-amd64
|
||||
|
||||
victoria-logs-windows-amd64-prod:
|
||||
APP_NAME=victoria-logs $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
package-victoria-logs:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker
|
||||
|
||||
package-victoria-logs-pure:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-pure
|
||||
|
||||
package-victoria-logs-amd64:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-victoria-logs-arm:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-arm
|
||||
|
||||
package-victoria-logs-arm64:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-victoria-logs-ppc64le:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-victoria-logs-386:
|
||||
APP_NAME=victoria-logs $(MAKE) package-via-docker-386
|
||||
|
||||
publish-victoria-logs:
|
||||
APP_NAME=victoria-logs $(MAKE) publish-via-docker
|
||||
|
||||
victoria-logs-linux-amd64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-linux-arm:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-linux-arm64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-linux-ppc64le:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-linux-s390x:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-linux-386:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-darwin-amd64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-darwin-arm64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-freebsd-amd64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-openbsd-amd64:
|
||||
APP_NAME=victoria-logs CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
victoria-logs-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=victoria-logs $(MAKE) app-local-windows-goarch
|
||||
|
||||
victoria-logs-pure:
|
||||
APP_NAME=victoria-logs $(MAKE) app-local-pure
|
8
app/victoria-logs/deployment/Dockerfile
Normal file
8
app/victoria-logs/deployment/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
|||
ARG base_image
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 8428
|
||||
|
||||
ENTRYPOINT ["/victoria-logs-prod"]
|
||||
ARG src_binary
|
||||
COPY $src_binary ./victoria-logs-prod
|
102
app/victoria-logs/main.go
Normal file
102
app/victoria-logs/main.go
Normal file
|
@ -0,0 +1,102 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":9428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
gogc = flag.Int("gogc", 100, "GOGC to use. See https://tip.golang.org/doc/gc-guide")
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
cgroup.SetGOGC(*gogc)
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
logger.Infof("starting VictoriaLogs at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
|
||||
vlstorage.Init()
|
||||
vlselect.Init()
|
||||
vlinsert.Init()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds())
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
|
||||
logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
|
||||
startTime = time.Now()
|
||||
if err := httpserver.Stop(*httpListenAddr); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
vlinsert.Stop()
|
||||
vlselect.Stop()
|
||||
vlstorage.Stop()
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
|
||||
logger.Infof("the VictoriaLogs has been stopped in %.3f seconds", time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>Single-node VictoriaLogs</h2></br>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/VictoriaLogs/'>https://docs.victoriametrics.com/VictoriaLogs/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
})
|
||||
return true
|
||||
}
|
||||
if vlinsert.RequestHandler(w, r) {
|
||||
return true
|
||||
}
|
||||
if vlselect.RequestHandler(w, r) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
victoria-logs is a log management and analytics service.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/VictoriaLogs/
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
12
app/victoria-logs/multiarch/Dockerfile
Normal file
12
app/victoria-logs/multiarch/Dockerfile
Normal file
|
@ -0,0 +1,12 @@
|
|||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image
|
||||
ARG root_image
|
||||
FROM $certs_image as certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 8428
|
||||
ENTRYPOINT ["/victoria-logs-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY victoria-logs-linux-${TARGETARCH}-prod ./victoria-logs-prod
|
20
app/vlinsert/elasticsearch/bulk_response.qtpl
Normal file
20
app/vlinsert/elasticsearch/bulk_response.qtpl
Normal file
|
@ -0,0 +1,20 @@
|
|||
{% stripspace %}
|
||||
|
||||
{% func BulkResponse(n int, tookMs int64) %}
|
||||
{
|
||||
"took":{%dl tookMs %},
|
||||
"errors":false,
|
||||
"items":[
|
||||
{% for i := 0; i < n; i++ %}
|
||||
{
|
||||
"create":{
|
||||
"status":201
|
||||
}
|
||||
}
|
||||
{% if i+1 < n %},{% endif %}
|
||||
{% endfor %}
|
||||
]
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
69
app/vlinsert/elasticsearch/bulk_response.qtpl.go
Normal file
69
app/vlinsert/elasticsearch/bulk_response.qtpl.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Code generated by qtc from "bulk_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
|
||||
package elasticsearch
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
|
||||
func StreamBulkResponse(qw422016 *qt422016.Writer, n int, tookMs int64) {
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:3
|
||||
qw422016.N().S(`{"took":`)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:5
|
||||
qw422016.N().DL(tookMs)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:5
|
||||
qw422016.N().S(`,"errors":false,"items":[`)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:8
|
||||
for i := 0; i < n; i++ {
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:8
|
||||
qw422016.N().S(`{"create":{"status":201}}`)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
|
||||
if i+1 < n {
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:14
|
||||
}
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:15
|
||||
}
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:15
|
||||
qw422016.N().S(`]}`)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
}
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
func WriteBulkResponse(qq422016 qtio422016.Writer, n int, tookMs int64) {
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
StreamBulkResponse(qw422016, n, tookMs)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
}
|
||||
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
func BulkResponse(n int, tookMs int64) string {
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
WriteBulkResponse(qb422016, n, tookMs)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
return qs422016
|
||||
//line app/vlinsert/elasticsearch/bulk_response.qtpl:18
|
||||
}
|
410
app/vlinsert/elasticsearch/elasticsearch.go
Normal file
410
app/vlinsert/elasticsearch/elasticsearch.go
Normal file
|
@ -0,0 +1,410 @@
|
|||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/valyala/fastjson"
|
||||
)
|
||||
|
||||
var (
|
||||
maxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers")
|
||||
)
|
||||
|
||||
// RequestHandler processes ElasticSearch insert requests
|
||||
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
// This header is needed for Logstash
|
||||
w.Header().Set("X-Elastic-Product", "Elasticsearch")
|
||||
|
||||
if strings.HasPrefix(path, "/_ilm/policy") {
|
||||
// Return fake response for ElasticSearch ilm request.
|
||||
fmt.Fprintf(w, `{}`)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/_index_template") {
|
||||
// Return fake response for ElasticSearch index template request.
|
||||
fmt.Fprintf(w, `{}`)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/_ingest") {
|
||||
// Return fake response for ElasticSearch ingest pipeline request.
|
||||
// See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/put-pipeline-api.html
|
||||
fmt.Fprintf(w, `{}`)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/_nodes") {
|
||||
// Return fake response for ElasticSearch nodes discovery request.
|
||||
// See: https://www.elastic.co/guide/en/elasticsearch/reference/8.8/cluster.html
|
||||
fmt.Fprintf(w, `{}`)
|
||||
return true
|
||||
}
|
||||
switch path {
|
||||
case "/":
|
||||
switch r.Method {
|
||||
case http.MethodGet:
|
||||
// Return fake response for ElasticSearch ping request.
|
||||
// See the latest available version for ElasticSearch at https://github.com/elastic/elasticsearch/releases
|
||||
fmt.Fprintf(w, `{
|
||||
"version": {
|
||||
"number": "8.8.0"
|
||||
}
|
||||
}`)
|
||||
case http.MethodHead:
|
||||
// Return empty response for Logstash ping request.
|
||||
}
|
||||
|
||||
return true
|
||||
case "/_license":
|
||||
// Return fake response for ElasticSearch license request.
|
||||
fmt.Fprintf(w, `{
|
||||
"license": {
|
||||
"uid": "cbff45e7-c553-41f7-ae4f-9205eabd80xx",
|
||||
"type": "oss",
|
||||
"status": "active",
|
||||
"expiry_date_in_millis" : 4000000000000
|
||||
}
|
||||
}`)
|
||||
return true
|
||||
case "/_bulk":
|
||||
startTime := time.Now()
|
||||
bulkRequestsTotal.Inc()
|
||||
|
||||
// Extract tenantID
|
||||
tenantID, err := logstorage.GetTenantIDFromRequest(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
|
||||
// Extract time field name from _time_field query arg
|
||||
var timeField = "_time"
|
||||
if tf := r.FormValue("_time_field"); tf != "" {
|
||||
timeField = tf
|
||||
}
|
||||
|
||||
// Extract message field name from _msg_field query arg
|
||||
var msgField = ""
|
||||
if msgf := r.FormValue("_msg_field"); msgf != "" {
|
||||
msgField = msgf
|
||||
}
|
||||
|
||||
// Extract stream field names from _stream_fields query arg
|
||||
var streamFields []string
|
||||
if sfs := r.FormValue("_stream_fields"); sfs != "" {
|
||||
streamFields = strings.Split(sfs, ",")
|
||||
}
|
||||
|
||||
// Extract field names, which must be ignored
|
||||
var ignoreFields []string
|
||||
if ifs := r.FormValue("ignore_fields"); ifs != "" {
|
||||
ignoreFields = strings.Split(ifs, ",")
|
||||
}
|
||||
|
||||
lr := logstorage.GetLogRows(streamFields, ignoreFields)
|
||||
processLogMessage := func(timestamp int64, fields []logstorage.Field) {
|
||||
lr.MustAdd(tenantID, timestamp, fields)
|
||||
if lr.NeedFlush() {
|
||||
vlstorage.MustAddRows(lr)
|
||||
lr.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
isGzip := r.Header.Get("Content-Encoding") == "gzip"
|
||||
n, err := readBulkRequest(r.Body, isGzip, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
|
||||
return true
|
||||
}
|
||||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
|
||||
tookMs := time.Since(startTime).Milliseconds()
|
||||
bw := bufferedwriter.Get(w)
|
||||
defer bufferedwriter.Put(bw)
|
||||
WriteBulkResponse(bw, n, tookMs)
|
||||
_ = bw.Flush()
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
var bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
|
||||
|
||||
func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
|
||||
processLogMessage func(timestamp int64, fields []logstorage.Field),
|
||||
) (int, error) {
|
||||
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
|
||||
|
||||
if isGzip {
|
||||
zr, err := common.GetGzipReader(r)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot read gzipped _bulk request: %w", err)
|
||||
}
|
||||
defer common.PutGzipReader(zr)
|
||||
r = zr
|
||||
}
|
||||
|
||||
wcr := writeconcurrencylimiter.GetReader(r)
|
||||
defer writeconcurrencylimiter.PutReader(wcr)
|
||||
|
||||
lb := lineBufferPool.Get()
|
||||
defer lineBufferPool.Put(lb)
|
||||
|
||||
lb.B = bytesutil.ResizeNoCopyNoOverallocate(lb.B, maxLineSizeBytes.IntN())
|
||||
sc := bufio.NewScanner(wcr)
|
||||
sc.Buffer(lb.B, len(lb.B))
|
||||
|
||||
n := 0
|
||||
nCheckpoint := 0
|
||||
for {
|
||||
ok, err := readBulkLine(sc, timeField, msgField, processLogMessage)
|
||||
wcr.DecConcurrency()
|
||||
if err != nil || !ok {
|
||||
rowsIngestedTotal.Add(n - nCheckpoint)
|
||||
return n, err
|
||||
}
|
||||
n++
|
||||
if batchSize := n - nCheckpoint; n >= 1000 {
|
||||
rowsIngestedTotal.Add(batchSize)
|
||||
nCheckpoint = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var lineBufferPool bytesutil.ByteBufferPool
|
||||
|
||||
var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
|
||||
|
||||
func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
|
||||
processLogMessage func(timestamp int64, fields []logstorage.Field),
|
||||
) (bool, error) {
|
||||
// Decode command, must be "create" or "index"
|
||||
if !sc.Scan() {
|
||||
if err := sc.Err(); err != nil {
|
||||
if errors.Is(err, bufio.ErrTooLong) {
|
||||
return false, fmt.Errorf(`cannot read "create" or "index" command, since its size exceeds -insert.maxLineSizeBytes=%d`, maxLineSizeBytes.IntN())
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
line := sc.Bytes()
|
||||
p := parserPool.Get()
|
||||
v, err := p.ParseBytes(line)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf(`cannot parse "create" or "index" command: %w`, err)
|
||||
}
|
||||
if v.GetObject("create") == nil && v.GetObject("index") == nil {
|
||||
return false, fmt.Errorf(`unexpected command %q; expected "create" or "index"`, v)
|
||||
}
|
||||
parserPool.Put(p)
|
||||
|
||||
// Decode log message
|
||||
if !sc.Scan() {
|
||||
if err := sc.Err(); err != nil {
|
||||
if errors.Is(err, bufio.ErrTooLong) {
|
||||
return false, fmt.Errorf("cannot read log message, since its size exceeds -insert.maxLineSizeBytes=%d", maxLineSizeBytes.IntN())
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
|
||||
}
|
||||
line = sc.Bytes()
|
||||
pctx := getParserCtx()
|
||||
if err := pctx.parseLogMessage(line); err != nil {
|
||||
invalidJSONLineLogger.Warnf("cannot parse json-encoded log entry: %s", err)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
timestamp, err := extractTimestampFromFields(timeField, pctx.fields)
|
||||
if err != nil {
|
||||
invalidTimestampLogger.Warnf("skipping the log entry because cannot parse timestamp: %s", err)
|
||||
return true, nil
|
||||
}
|
||||
updateMessageFieldName(msgField, pctx.fields)
|
||||
processLogMessage(timestamp, pctx.fields)
|
||||
putParserCtx(pctx)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
var parserPool fastjson.ParserPool
|
||||
|
||||
var (
|
||||
invalidTimestampLogger = logger.WithThrottler("invalidTimestampLogger", 5*time.Second)
|
||||
invalidJSONLineLogger = logger.WithThrottler("invalidJSONLineLogger", 5*time.Second)
|
||||
)
|
||||
|
||||
func extractTimestampFromFields(timeField string, fields []logstorage.Field) (int64, error) {
|
||||
for i := range fields {
|
||||
f := &fields[i]
|
||||
if f.Name != timeField {
|
||||
continue
|
||||
}
|
||||
timestamp, err := parseElasticsearchTimestamp(f.Value)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
f.Value = ""
|
||||
return timestamp, nil
|
||||
}
|
||||
return time.Now().UnixNano(), nil
|
||||
}
|
||||
|
||||
func updateMessageFieldName(msgField string, fields []logstorage.Field) {
|
||||
if msgField == "" {
|
||||
return
|
||||
}
|
||||
for i := range fields {
|
||||
f := &fields[i]
|
||||
if f.Name == msgField {
|
||||
f.Name = "_msg"
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type parserCtx struct {
|
||||
p fastjson.Parser
|
||||
buf []byte
|
||||
prefixBuf []byte
|
||||
fields []logstorage.Field
|
||||
}
|
||||
|
||||
func (pctx *parserCtx) reset() {
|
||||
pctx.buf = pctx.buf[:0]
|
||||
pctx.prefixBuf = pctx.prefixBuf[:0]
|
||||
|
||||
fields := pctx.fields
|
||||
for i := range fields {
|
||||
lf := &fields[i]
|
||||
lf.Name = ""
|
||||
lf.Value = ""
|
||||
}
|
||||
pctx.fields = fields[:0]
|
||||
}
|
||||
|
||||
func getParserCtx() *parserCtx {
|
||||
v := parserCtxPool.Get()
|
||||
if v == nil {
|
||||
return &parserCtx{}
|
||||
}
|
||||
return v.(*parserCtx)
|
||||
}
|
||||
|
||||
func putParserCtx(pctx *parserCtx) {
|
||||
pctx.reset()
|
||||
parserCtxPool.Put(pctx)
|
||||
}
|
||||
|
||||
var parserCtxPool sync.Pool
|
||||
|
||||
func (pctx *parserCtx) parseLogMessage(msg []byte) error {
|
||||
s := bytesutil.ToUnsafeString(msg)
|
||||
v, err := pctx.p.Parse(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse json: %w", err)
|
||||
}
|
||||
if t := v.Type(); t != fastjson.TypeObject {
|
||||
return fmt.Errorf("expecting json dictionary; got %s", t)
|
||||
}
|
||||
pctx.reset()
|
||||
pctx.fields, pctx.buf, pctx.prefixBuf = appendLogFields(pctx.fields, pctx.buf, pctx.prefixBuf, v)
|
||||
return nil
|
||||
}
|
||||
|
||||
func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) {
|
||||
o := v.GetObject()
|
||||
o.Visit(func(k []byte, v *fastjson.Value) {
|
||||
t := v.Type()
|
||||
switch t {
|
||||
case fastjson.TypeNull:
|
||||
// Skip nulls
|
||||
case fastjson.TypeObject:
|
||||
// Flatten nested JSON objects.
|
||||
// For example, {"foo":{"bar":"baz"}} is converted to {"foo.bar":"baz"}
|
||||
prefixLen := len(prefixBuf)
|
||||
prefixBuf = append(prefixBuf, k...)
|
||||
prefixBuf = append(prefixBuf, '.')
|
||||
dst, dstBuf, prefixBuf = appendLogFields(dst, dstBuf, prefixBuf, v)
|
||||
prefixBuf = prefixBuf[:prefixLen]
|
||||
case fastjson.TypeArray, fastjson.TypeNumber, fastjson.TypeTrue, fastjson.TypeFalse:
|
||||
// Convert JSON arrays, numbers, true and false values to their string representation
|
||||
dstBufLen := len(dstBuf)
|
||||
dstBuf = v.MarshalTo(dstBuf)
|
||||
value := dstBuf[dstBufLen:]
|
||||
dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value)
|
||||
case fastjson.TypeString:
|
||||
// Decode JSON strings
|
||||
dstBufLen := len(dstBuf)
|
||||
dstBuf = append(dstBuf, v.GetStringBytes()...)
|
||||
value := dstBuf[dstBufLen:]
|
||||
dst, dstBuf = appendLogField(dst, dstBuf, prefixBuf, k, value)
|
||||
default:
|
||||
logger.Panicf("BUG: unexpected JSON type: %s", t)
|
||||
}
|
||||
})
|
||||
return dst, dstBuf, prefixBuf
|
||||
}
|
||||
|
||||
func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) {
|
||||
dstBufLen := len(dstBuf)
|
||||
dstBuf = append(dstBuf, prefixBuf...)
|
||||
dstBuf = append(dstBuf, k...)
|
||||
name := dstBuf[dstBufLen:]
|
||||
|
||||
dst = append(dst, logstorage.Field{
|
||||
Name: bytesutil.ToUnsafeString(name),
|
||||
Value: bytesutil.ToUnsafeString(value),
|
||||
})
|
||||
return dst, dstBuf
|
||||
}
|
||||
|
||||
func parseElasticsearchTimestamp(s string) (int64, error) {
|
||||
if len(s) < len("YYYY-MM-DD") || s[len("YYYY")] != '-' {
|
||||
// Try parsing timestamp in milliseconds
|
||||
n, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse timestamp in milliseconds from %q: %w", s, err)
|
||||
}
|
||||
if n > int64(math.MaxInt64)/1e6 {
|
||||
return 0, fmt.Errorf("too big timestamp in milliseconds: %d; mustn't exceed %d", n, int64(math.MaxInt64)/1e6)
|
||||
}
|
||||
if n < int64(math.MinInt64)/1e6 {
|
||||
return 0, fmt.Errorf("too small timestamp in milliseconds: %d; must be bigger than %d", n, int64(math.MinInt64)/1e6)
|
||||
}
|
||||
n *= 1e6
|
||||
return n, nil
|
||||
}
|
||||
if len(s) == len("YYYY-MM-DD") {
|
||||
t, err := time.Parse("2006-01-02", s)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse date %q: %w", s, err)
|
||||
}
|
||||
return t.UnixNano(), nil
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, s)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse timestamp %q: %w", s, err)
|
||||
}
|
||||
return t.UnixNano(), nil
|
||||
}
|
97
app/vlinsert/elasticsearch/elasticsearch_test.go
Normal file
97
app/vlinsert/elasticsearch/elasticsearch_test.go
Normal file
|
@ -0,0 +1,97 @@
|
|||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
func TestReadBulkRequest(t *testing.T) {
|
||||
f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
|
||||
t.Helper()
|
||||
|
||||
var timestamps []int64
|
||||
var result string
|
||||
processLogMessage := func(timestamp int64, fields []logstorage.Field) {
|
||||
timestamps = append(timestamps, timestamp)
|
||||
|
||||
a := make([]string, len(fields))
|
||||
for i, f := range fields {
|
||||
a[i] = fmt.Sprintf("%q:%q", f.Name, f.Value)
|
||||
}
|
||||
s := "{" + strings.Join(a, ",") + "}\n"
|
||||
result += s
|
||||
}
|
||||
|
||||
// Read the request without compression
|
||||
r := bytes.NewBufferString(data)
|
||||
rows, err := readBulkRequest(r, false, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if rows != rowsExpected {
|
||||
t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(timestamps, timestampsExpected) {
|
||||
t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
|
||||
}
|
||||
|
||||
// Read the request with compression
|
||||
timestamps = nil
|
||||
result = ""
|
||||
compressedData := compressData(data)
|
||||
r = bytes.NewBufferString(compressedData)
|
||||
rows, err = readBulkRequest(r, true, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if rows != rowsExpected {
|
||||
t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(timestamps, timestampsExpected) {
|
||||
t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", timestamps, timestampsExpected)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
data := `{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
|
||||
{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"}
|
||||
{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"}
|
||||
`
|
||||
timeField := "@timestamp"
|
||||
msgField := "message"
|
||||
rowsExpected := 3
|
||||
timestampsExpected := []int64{1686026891735000000, 1686026892735000000, 1686026893735000000}
|
||||
resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"}
|
||||
{"@timestamp":"","_msg":"baz"}
|
||||
{"_msg":"xyz","@timestamp":"","x":"y"}
|
||||
`
|
||||
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
|
||||
}
|
||||
|
||||
func compressData(s string) string {
|
||||
var bb bytes.Buffer
|
||||
zw := gzip.NewWriter(&bb)
|
||||
if _, err := zw.Write([]byte(s)); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when compressing data: %s", err))
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
|
||||
}
|
||||
return bb.String()
|
||||
}
|
50
app/vlinsert/elasticsearch/elasticsearch_timing_test.go
Normal file
50
app/vlinsert/elasticsearch/elasticsearch_timing_test.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
func BenchmarkReadBulkRequest(b *testing.B) {
|
||||
b.Run("gzip:off", func(b *testing.B) {
|
||||
benchmarkReadBulkRequest(b, false)
|
||||
})
|
||||
b.Run("gzip:on", func(b *testing.B) {
|
||||
benchmarkReadBulkRequest(b, true)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
|
||||
data := `{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
|
||||
{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"}
|
||||
{"create":{"_index":"filebeat-8.8.0"}}
|
||||
{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"}
|
||||
`
|
||||
if isGzip {
|
||||
data = compressData(data)
|
||||
}
|
||||
dataBytes := bytesutil.ToUnsafeBytes(data)
|
||||
|
||||
timeField := "@timestamp"
|
||||
msgField := "message"
|
||||
processLogMessage := func(timestmap int64, fields []logstorage.Field) {}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(data)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
r := &bytes.Reader{}
|
||||
for pb.Next() {
|
||||
r.Reset(dataBytes)
|
||||
_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
34
app/vlinsert/main.go
Normal file
34
app/vlinsert/main.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package vlinsert
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/elasticsearch"
|
||||
)
|
||||
|
||||
// Init initializes vlinsert
|
||||
func Init() {
|
||||
}
|
||||
|
||||
// Stop stops vlinsert
|
||||
func Stop() {
|
||||
}
|
||||
|
||||
// RequestHandler handles insert requests for VictoriaLogs
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
if !strings.HasPrefix(path, "/insert/") {
|
||||
return false
|
||||
}
|
||||
path = strings.TrimPrefix(path, "/insert")
|
||||
path = strings.ReplaceAll(path, "//", "/")
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(path, "/elasticsearch/"):
|
||||
path = strings.TrimPrefix(path, "/elasticsearch")
|
||||
return elasticsearch.RequestHandler(path, w, r)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
53
app/vlselect/logsql/logsql.go
Normal file
53
app/vlselect/logsql/logsql.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
package logsql
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
// ProcessQueryRequest handles /select/logsql/query request
|
||||
func ProcessQueryRequest(w http.ResponseWriter, r *http.Request, stopCh <-chan struct{}) {
|
||||
// Extract tenantID
|
||||
tenantID, err := logstorage.GetTenantIDFromRequest(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
|
||||
qStr := r.FormValue("query")
|
||||
q, err := logstorage.ParseQuery(qStr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/stream+json; charset=utf-8")
|
||||
|
||||
bw := bufferedwriter.Get(w)
|
||||
defer bufferedwriter.Put(bw)
|
||||
|
||||
tenantIDs := []logstorage.TenantID{tenantID}
|
||||
vlstorage.RunQuery(tenantIDs, q, stopCh, func(columns []logstorage.BlockColumn) {
|
||||
if len(columns) == 0 {
|
||||
return
|
||||
}
|
||||
rowsCount := len(columns[0].Values)
|
||||
|
||||
bb := blockResultPool.Get()
|
||||
for rowIdx := 0; rowIdx < rowsCount; rowIdx++ {
|
||||
WriteJSONRow(bb, columns, rowIdx)
|
||||
}
|
||||
// Do not check for error here, since the only valid error is when the client
|
||||
// closes the connection during Write() call. There is no need in logging this error,
|
||||
// since it may be too verbose and it doesn't give any actionable info.
|
||||
_, _ = bw.Write(bb.B)
|
||||
blockResultPool.Put(bb)
|
||||
})
|
||||
_ = bw.Flush()
|
||||
}
|
||||
|
||||
var blockResultPool bytesutil.ByteBufferPool
|
20
app/vlselect/logsql/query_response.qtpl
Normal file
20
app/vlselect/logsql/query_response.qtpl
Normal file
|
@ -0,0 +1,20 @@
|
|||
{% import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
) %}
|
||||
|
||||
{% stripspace %}
|
||||
|
||||
// JSONRow creates JSON row from the given fields.
|
||||
{% func JSONRow(columns []logstorage.BlockColumn, rowIdx int) %}
|
||||
{
|
||||
{% code c := &columns[0] %}
|
||||
{%q= c.Name %}:{%q= c.Values[rowIdx] %}
|
||||
{% code columns = columns[1:] %}
|
||||
{% for colIdx := range columns %}
|
||||
{% code c := &columns[colIdx] %}
|
||||
,{%q= c.Name %}:{%q= c.Values[rowIdx] %}
|
||||
{% endfor %}
|
||||
}{% newline %}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
90
app/vlselect/logsql/query_response.qtpl.go
Normal file
90
app/vlselect/logsql/query_response.qtpl.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
// Code generated by qtc from "query_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:1
|
||||
package logsql
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:1
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
// JSONRow creates JSON row from the given fields.
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:8
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:8
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:8
|
||||
func StreamJSONRow(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
|
||||
//line app/vlselect/logsql/query_response.qtpl:8
|
||||
qw422016.N().S(`{`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:10
|
||||
c := &columns[0]
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:11
|
||||
qw422016.N().Q(c.Name)
|
||||
//line app/vlselect/logsql/query_response.qtpl:11
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:11
|
||||
qw422016.N().Q(c.Values[rowIdx])
|
||||
//line app/vlselect/logsql/query_response.qtpl:12
|
||||
columns = columns[1:]
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:13
|
||||
for colIdx := range columns {
|
||||
//line app/vlselect/logsql/query_response.qtpl:14
|
||||
c := &columns[colIdx]
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:15
|
||||
qw422016.N().Q(c.Name)
|
||||
//line app/vlselect/logsql/query_response.qtpl:15
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:15
|
||||
qw422016.N().Q(c.Values[rowIdx])
|
||||
//line app/vlselect/logsql/query_response.qtpl:16
|
||||
}
|
||||
//line app/vlselect/logsql/query_response.qtpl:16
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:17
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
func WriteJSONRow(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
StreamJSONRow(qw422016, columns, rowIdx)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
func JSONRow(columns []logstorage.BlockColumn, rowIdx int) string {
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
WriteJSONRow(qb422016, columns, rowIdx)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/query_response.qtpl:18
|
||||
}
|
140
app/vlselect/main.go
Normal file
140
app/vlselect/main.go
Normal file
|
@ -0,0 +1,140 @@
|
|||
package vlselect
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
|
||||
"It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+
|
||||
"See also -search.maxQueueDuration")
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
|
||||
"limit is reached; see also -search.maxQueryDuration")
|
||||
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution")
|
||||
)
|
||||
|
||||
func getDefaultMaxConcurrentRequests() int {
|
||||
n := cgroup.AvailableCPUs()
|
||||
if n <= 4 {
|
||||
n *= 2
|
||||
}
|
||||
if n > 16 {
|
||||
// A single request can saturate all the CPU cores, so there is no sense
|
||||
// in allowing higher number of concurrent requests - they will just contend
|
||||
// for unavailable CPU time.
|
||||
n = 16
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// Init initializes vlselect
|
||||
func Init() {
|
||||
concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)
|
||||
}
|
||||
|
||||
// Stop stops vlselect
|
||||
func Stop() {
|
||||
}
|
||||
|
||||
var concurrencyLimitCh chan struct{}
|
||||
|
||||
var (
|
||||
concurrencyLimitReached = metrics.NewCounter(`vl_concurrent_select_limit_reached_total`)
|
||||
concurrencyLimitTimeout = metrics.NewCounter(`vl_concurrent_select_limit_timeout_total`)
|
||||
|
||||
_ = metrics.NewGauge(`vl_concurrent_select_capacity`, func() float64 {
|
||||
return float64(cap(concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.NewGauge(`vl_concurrent_select_current`, func() float64 {
|
||||
return float64(len(concurrencyLimitCh))
|
||||
})
|
||||
)
|
||||
|
||||
// RequestHandler handles select requests for VictoriaLogs
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
if !strings.HasPrefix(path, "/select/") {
|
||||
return false
|
||||
}
|
||||
path = strings.TrimPrefix(path, "/select")
|
||||
path = strings.ReplaceAll(path, "//", "/")
|
||||
|
||||
// Limit the number of concurrent queries.
|
||||
startTime := time.Now()
|
||||
stopCh := r.Context().Done()
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
defer func() { <-concurrencyLimitCh }()
|
||||
default:
|
||||
// Sleep for a while until giving up. This should resolve short bursts in requests.
|
||||
concurrencyLimitReached.Inc()
|
||||
d := getMaxQueryDuration(r)
|
||||
if d > *maxQueueDuration {
|
||||
d = *maxQueueDuration
|
||||
}
|
||||
t := timerpool.Get(d)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
timerpool.Put(t)
|
||||
defer func() { <-concurrencyLimitCh }()
|
||||
case <-stopCh:
|
||||
timerpool.Put(t)
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Infof("client has cancelled the request after %.3f seconds: remoteAddr=%s, requestURI: %q",
|
||||
time.Since(startTime).Seconds(), remoteAddr, requestURI)
|
||||
return true
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
concurrencyLimitTimeout.Inc()
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests "+
|
||||
"are executed. Possible solutions: to reduce query load; to add more compute resources to the server; "+
|
||||
"to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration; to increase -search.maxConcurrentRequests",
|
||||
d.Seconds(), *maxConcurrentRequests, maxQueueDuration),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case path == "/logsql/query":
|
||||
logsqlQueryRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
logsql.ProcessQueryRequest(w, r, stopCh)
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// getMaxQueryDuration returns the maximum duration for query from r.
|
||||
func getMaxQueryDuration(r *http.Request) time.Duration {
|
||||
dms, err := httputils.GetDuration(r, "timeout", 0)
|
||||
if err != nil {
|
||||
dms = 0
|
||||
}
|
||||
d := time.Duration(dms) * time.Millisecond
|
||||
if d <= 0 || d > *maxQueryDuration {
|
||||
d = *maxQueryDuration
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
var (
|
||||
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
|
||||
)
|
149
app/vlstorage/main.go
Normal file
149
app/vlstorage/main.go
Normal file
|
@ -0,0 +1,149 @@
|
|||
package vlstorage
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
retentionPeriod = flagutil.NewDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+
|
||||
"log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/#retention")
|
||||
futureRetention = flagutil.NewDuration("futureRetention", "2d", "Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/#retention")
|
||||
storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory with the VictoriaLogs data; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/#storage")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
|
||||
logNewStreams = flag.Bool("logNewStreams", false, "Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
|
||||
logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion ; see also -logNewStreams")
|
||||
)
|
||||
|
||||
// Init initializes vlstorage.
|
||||
//
|
||||
// Stop must be called when vlstorage is no longer needed
|
||||
func Init() {
|
||||
if strg != nil {
|
||||
logger.Panicf("BUG: Init() has been already called")
|
||||
}
|
||||
|
||||
if retentionPeriod.Msecs < 24*3600*1000 {
|
||||
logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
|
||||
}
|
||||
cfg := &logstorage.StorageConfig{
|
||||
Retention: time.Millisecond * time.Duration(retentionPeriod.Msecs),
|
||||
FlushInterval: *inmemoryDataFlushInterval,
|
||||
FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs),
|
||||
LogNewStreams: *logNewStreams,
|
||||
LogIngestedRows: *logIngestedRows,
|
||||
}
|
||||
strg = logstorage.MustOpenStorage(*storageDataPath, cfg)
|
||||
storageMetrics = initStorageMetrics(strg)
|
||||
metrics.RegisterSet(storageMetrics)
|
||||
}
|
||||
|
||||
// Stop stops vlstorage.
|
||||
func Stop() {
|
||||
metrics.UnregisterSet(storageMetrics)
|
||||
storageMetrics = nil
|
||||
|
||||
strg.MustClose()
|
||||
strg = nil
|
||||
}
|
||||
|
||||
var strg *logstorage.Storage
|
||||
var storageMetrics *metrics.Set
|
||||
|
||||
// MustAddRows adds lr to vlstorage
|
||||
func MustAddRows(lr *logstorage.LogRows) {
|
||||
strg.MustAddRows(lr)
|
||||
}
|
||||
|
||||
// RunQuery runs the given q and calls processBlock for the returned data blocks
|
||||
func RunQuery(tenantIDs []logstorage.TenantID, q *logstorage.Query, stopCh <-chan struct{}, processBlock func(columns []logstorage.BlockColumn)) {
|
||||
strg.RunQuery(tenantIDs, q, stopCh, processBlock)
|
||||
}
|
||||
|
||||
func initStorageMetrics(strg *logstorage.Storage) *metrics.Set {
|
||||
ssCache := &logstorage.StorageStats{}
|
||||
var ssCacheLock sync.Mutex
|
||||
var lastUpdateTime time.Time
|
||||
|
||||
m := func() *logstorage.StorageStats {
|
||||
ssCacheLock.Lock()
|
||||
defer ssCacheLock.Unlock()
|
||||
if time.Since(lastUpdateTime) < time.Second {
|
||||
return ssCache
|
||||
}
|
||||
var ss logstorage.StorageStats
|
||||
strg.UpdateStats(&ss)
|
||||
ssCache = &ss
|
||||
lastUpdateTime = time.Now()
|
||||
return ssCache
|
||||
}
|
||||
|
||||
ms := metrics.NewSet()
|
||||
|
||||
ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 {
|
||||
return float64(fs.MustGetFreeSpace(*storageDataPath))
|
||||
})
|
||||
|
||||
ms.NewGauge(`vl_rows{type="inmemory"}`, func() float64 {
|
||||
return float64(m().InmemoryRowsCount)
|
||||
})
|
||||
ms.NewGauge(`vl_rows{type="file"}`, func() float64 {
|
||||
return float64(m().FileRowsCount)
|
||||
})
|
||||
ms.NewGauge(`vl_parts{type="inmemory"}`, func() float64 {
|
||||
return float64(m().InmemoryParts)
|
||||
})
|
||||
ms.NewGauge(`vl_parts{type="file"}`, func() float64 {
|
||||
return float64(m().FileParts)
|
||||
})
|
||||
ms.NewGauge(`vl_blocks{type="inmemory"}`, func() float64 {
|
||||
return float64(m().InmemoryBlocks)
|
||||
})
|
||||
ms.NewGauge(`vl_blocks{type="file"}`, func() float64 {
|
||||
return float64(m().FileBlocks)
|
||||
})
|
||||
ms.NewGauge(`vl_partitions`, func() float64 {
|
||||
return float64(m().PartitionsCount)
|
||||
})
|
||||
ms.NewGauge(`vl_streams_created_total`, func() float64 {
|
||||
return float64(m().StreamsCreatedTotal)
|
||||
})
|
||||
|
||||
ms.NewGauge(`vl_compressed_data_size_bytes{type="inmemory"}`, func() float64 {
|
||||
return float64(m().CompressedInmemorySize)
|
||||
})
|
||||
ms.NewGauge(`vl_compressed_data_size_bytes{type="file"}`, func() float64 {
|
||||
return float64(m().CompressedFileSize)
|
||||
})
|
||||
ms.NewGauge(`vl_uncompressed_data_size_bytes{type="inmemory"}`, func() float64 {
|
||||
return float64(m().UncompressedInmemorySize)
|
||||
})
|
||||
ms.NewGauge(`vl_uncompressed_data_size_bytes{type="file"}`, func() float64 {
|
||||
return float64(m().UncompressedFileSize)
|
||||
})
|
||||
|
||||
ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_big_timestamp"}`, func() float64 {
|
||||
return float64(m().RowsDroppedTooBigTimestamp)
|
||||
})
|
||||
ms.NewGauge(`vlinsert_rows_dropped_total{reason="too_small_timestamp"}`, func() float64 {
|
||||
return float64(m().RowsDroppedTooSmallTimestamp)
|
||||
})
|
||||
|
||||
return ms
|
||||
}
|
1087
docs/VictoriaLogs/LogsQL.md
Normal file
1087
docs/VictoriaLogs/LogsQL.md
Normal file
File diff suppressed because it is too large
Load diff
481
docs/VictoriaLogs/README.md
Normal file
481
docs/VictoriaLogs/README.md
Normal file
|
@ -0,0 +1,481 @@
|
|||
# VictoriaLogs
|
||||
|
||||
VictoriaLogs is log management and log analytics system from [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/).
|
||||
|
||||
It provides the following key features:
|
||||
|
||||
- VictoriaLogs can accept logs from popular log collectors, which support
|
||||
[ElasticSearch data ingestion format](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html). See [these docs](#data-ingestion).
|
||||
[Grafana Loki data ingestion format](https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki) will be supported in the near future -
|
||||
see [the Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html).
|
||||
- VictoriaLogs is much easier to setup and operate comparing to ElasticSearch and Grafana Loki. See [these docs](#operation).
|
||||
- VictoriaLogs provides easy yet powerful query language with full-text search capabilities across
|
||||
all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) -
|
||||
see [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
|
||||
- VictoriaLogs can be seamlessly combined with good old Unix tools for log analysis such as `grep`, `less`, `sort`, `jq`, etc.
|
||||
See [these docs](#querying-via-command-line) for details.
|
||||
- VictoriaLogs capacity and performance scales lineraly with the available resources (CPU, RAM, disk IO, disk space).
|
||||
It runs smoothly on both Raspberry PI and a beefy server with hundreds of CPU cores and terabytes of RAM.
|
||||
- VictoriaLogs can handle much bigger data volumes than ElasticSearch and Grafana Loki when running on comparable hardware.
|
||||
A single-node VictoriaLogs instance can substitute large ElasticSearch cluster.
|
||||
|
||||
## Operation
|
||||
|
||||
### How to run VictoriaLogs
|
||||
|
||||
Checkout VictoriaLogs source code. It is located in the VictoriaMetrics repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/VictoriaMetrics/VictoriaMetrics
|
||||
cd VictoriaMetrics
|
||||
```
|
||||
|
||||
Then build VictoriaLogs. The build command requires [Go 1.20](https://golang.org/doc/install).
|
||||
|
||||
```bash
|
||||
make victoria-logs
|
||||
```
|
||||
|
||||
Then run the built binary:
|
||||
|
||||
```bash
|
||||
bin/victoria-logs
|
||||
```
|
||||
|
||||
VictoriaLogs is ready to [receive logs](#data-ingestion) and [query logs](#querying) at the TCP port `9428` now!
|
||||
It has no any external dependencies, so it may run in various environments without additional setup and configuration.
|
||||
VictoriaLogs automatically adapts to the available CPU and RAM resources. It also automatically setups and creates
|
||||
the needed indexes during [data ingestion](#data-ingestion).
|
||||
|
||||
It is possible to change the TCP port via `-httpListenAddr` command-line flag. For example, the following command
|
||||
starts VictoriaLogs, which accepts incoming requests at port `9200` (aka ElasticSearch HTTP API port):
|
||||
|
||||
```bash
|
||||
/path/to/victoria-logs -httpListenAddr=:9200
|
||||
```
|
||||
|
||||
VictoriaLogs stores the ingested data to the `victoria-logs-data` directory by default. The directory can be changed
|
||||
via `-storageDataPath` command-line flag. See [these docs](#storage) for details.
|
||||
|
||||
By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range.
|
||||
E.g. it uses the retention of 7 days. Read [these docs](#retention) on how to control the retention for the [ingested](#data-ingestion) logs.
|
||||
|
||||
It is recommended setting up monitoring of VictoriaLogs according to [these docs](#monitoring).
|
||||
|
||||
### Data ingestion
|
||||
|
||||
VictoriaLogs supports the following data ingestion techniques:
|
||||
|
||||
- Via [Filebeat](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-overview.html). See [these docs](#filebeat-setup).
|
||||
- Via [Logstash](https://www.elastic.co/guide/en/logstash/current/introduction.html). See [these docs](#logstash-setup).
|
||||
|
||||
The ingested log entries can be queried according to [these docs](#querying).
|
||||
|
||||
#### Data ingestion troubleshooting
|
||||
|
||||
VictoriaLogs provides the following command-line flags, which can help debugging data ingestion issues:
|
||||
|
||||
- `-logNewStreams` - if this flag is passed to VictoriaLogs, then it logs all the newly
|
||||
registered [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
|
||||
This may help debugging [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
|
||||
- `-logIngestedRows` - if this flag is passed to VictoriaLogs, then it logs all the ingested
|
||||
[log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
|
||||
VictoriaLogs exposes various [metrics](#monitoring), which may help debugging data ingestion issues:
|
||||
|
||||
- `vl_rows_ingested_total` - the number of ingested [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
since the last VictoriaLogs restart. If this number icreases over time, then logs are successfully ingested into VictoriaLogs.
|
||||
The ingested logs can be inspected in logs by passing `-logIngestedRows` command-line flag to VictoriaLogs.
|
||||
- `vl_streams_created_total` - the number of created [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
|
||||
since the last VictoriaLogs restart. If this metric grows rapidly during extended periods of time, then this may lead
|
||||
to [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
|
||||
The newly created log streams can be inspected in logs by passing `-logNewStreams` command-line flag to VictoriaLogs.
|
||||
|
||||
#### Filebeat setup
|
||||
|
||||
Specify [`output.elasicsearch`](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) section in the `filebeat.yml`
|
||||
for sending the collected logs to VictoriaLogs:
|
||||
|
||||
```yml
|
||||
output.elasticsearch:
|
||||
hosts: ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.hostname,log.file.path"
|
||||
```
|
||||
|
||||
Substitute the `localhost:9428` address inside `hosts` section with the real TCP address of VictoriaLogs.
|
||||
|
||||
The `_msg_field` parameter must contain the field name with the log message generated by Filebeat. This is usually `message` field.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details.
|
||||
|
||||
The `_time_field` parameter must contain the field name with the log timestamp generated by Filebeat. This is usually `@timestamp` field.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details.
|
||||
|
||||
It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Filebeat, in the `_stream_fields` parameter.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details.
|
||||
|
||||
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed,
|
||||
then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore.
|
||||
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
|
||||
|
||||
```yml
|
||||
output.elasticsearch:
|
||||
hosts: ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.name,log.file.path"
|
||||
ignore_fields: "log.offset,event.original"
|
||||
```
|
||||
|
||||
When Filebeat ingests logs into VictoriaLogs at a high rate, then it may be needed to tune `worker` and `bulk_max_size` options.
|
||||
For example, the following config is optimized for higher than usual ingestion rate:
|
||||
|
||||
```yml
|
||||
output.elasticsearch:
|
||||
hosts: ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.name,log.file.path"
|
||||
worker: 8
|
||||
bulk_max_size: 1000
|
||||
```
|
||||
|
||||
If the Filebeat sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `compression_level` option.
|
||||
This usually allows saving network bandwidth and costs by up to 5 times:
|
||||
|
||||
```yml
|
||||
output.elasticsearch:
|
||||
hosts: ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.name,log.file.path"
|
||||
compression_level: 1
|
||||
```
|
||||
|
||||
By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
|
||||
If you need storing logs in other tenant, then specify the needed tenant via `headers` at `output.elasticsearch` section.
|
||||
For example, the following `filebeat.yml` config instructs Filebeat to store the data to `(AccountID=12, ProjectID=34)` tenant:
|
||||
|
||||
```yml
|
||||
output.elasticsearch:
|
||||
hosts: ["http://localhost:9428/insert/elasticsearch/"]
|
||||
headers:
|
||||
AccountID: 12
|
||||
ProjectID: 34
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.name,log.file.path"
|
||||
```
|
||||
|
||||
The ingested log entries can be queried according to [these docs](#querying).
|
||||
|
||||
See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs.
|
||||
|
||||
#### Logstash setup
|
||||
|
||||
Specify [`output.elasticsearch`](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) section in the `logstash.conf` file
|
||||
for sending the collected logs to VictoriaLogs:
|
||||
|
||||
```conf
|
||||
output {
|
||||
elasticsearch {
|
||||
hosts => ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters => {
|
||||
"_msg_field" => "message"
|
||||
"_time_field" => "@timestamp"
|
||||
"_stream_fields" => "host.name,process.name"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Substitute `localhost:9428` address inside `hosts` with the real TCP address of VictoriaLogs.
|
||||
|
||||
The `_msg_field` parameter must contain the field name with the log message generated by Logstash. This is usually `message` field.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) for details.
|
||||
|
||||
The `_time_field` parameter must contain the field name with the log timestamp generated by Logstash. This is usually `@timestamp` field.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for details.
|
||||
|
||||
It is recommended specifying comma-separated list of field names, which uniquely identify every log stream collected by Logstash, in the `_stream_fields` parameter.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) for details.
|
||||
|
||||
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) aren't needed,
|
||||
then VictoriaLogs can be instructed to ignore them during data ingestion - just pass `ignore_fields` parameter with comma-separated list of fields to ignore.
|
||||
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
|
||||
|
||||
```conf
|
||||
output {
|
||||
elasticsearch {
|
||||
hosts => ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters => {
|
||||
"_msg_field" => "message"
|
||||
"_time_field" => "@timestamp"
|
||||
"_stream_fields" => "host.hostname,process.name"
|
||||
"ignore_fields" => "log.offset,event.original"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the Logstash sends logs to VictoriaLogs in another datacenter, then it may be useful enabling data compression via `http_compression: true` option.
|
||||
This usually allows saving network bandwidth and costs by up to 5 times:
|
||||
|
||||
```conf
|
||||
output {
|
||||
elasticsearch {
|
||||
hosts => ["http://localhost:9428/insert/elasticsearch/"]
|
||||
parameters => {
|
||||
"_msg_field" => "message"
|
||||
"_time_field" => "@timestamp"
|
||||
"_stream_fields" => "host.hostname,process.name"
|
||||
}
|
||||
http_compression => true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
|
||||
If you need storing logs in other tenant, then specify the needed tenant via `custom_headers` at `output.elasticsearch` section.
|
||||
For example, the following `logstash.conf` config instructs Logstash to store the data to `(AccountID=12, ProjectID=34)` tenant:
|
||||
|
||||
```conf
|
||||
output {
|
||||
elasticsearch {
|
||||
hosts => ["http://localhost:9428/insert/elasticsearch/"]
|
||||
custom_headers => {
|
||||
"AccountID" => "1"
|
||||
"ProjectID" => "2"
|
||||
}
|
||||
parameters => {
|
||||
"_msg_field" => "message"
|
||||
"_time_field" => "@timestamp"
|
||||
"_stream_fields" => "host.hostname,process.name"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The ingested log entries can be queried according to [these docs](#querying).
|
||||
|
||||
See also [data ingestion troubleshooting](#data-ingestion-trobuleshooting) docs.
|
||||
|
||||
### Querying
|
||||
|
||||
VictoriaLogs can be queried at the `/select/logsql/query` endpoint. The [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html)
|
||||
query must be passed via `query` argument. For example, the following query returns all the log entries with the `error` word:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error'
|
||||
```
|
||||
|
||||
The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body
|
||||
with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries
|
||||
when they do not fit the maximum url length of the used clients and proxies.
|
||||
|
||||
See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for details on what can be passed to the `query` arg.
|
||||
The `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl`
|
||||
or similar tools.
|
||||
|
||||
The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON),
|
||||
where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`.
|
||||
Example response:
|
||||
|
||||
```
|
||||
{"_msg":"error: disconnect from 19.54.37.22: Auth fail [preauth]","_stream":"{}","_time":"2023-01-01T13:32:13Z"}
|
||||
{"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"}
|
||||
```
|
||||
|
||||
The matching lines are sent to the response stream as soon as they are found in VictoriaLogs storage.
|
||||
This means that the returned response may contain billions of lines for queries matching too many log entries.
|
||||
The response can be interrupted at any time by closing the connection to VictoriaLogs server.
|
||||
This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc.
|
||||
See [these docs](#querying-via-command-line) for more details.
|
||||
|
||||
The returned lines aren't sorted by default, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
|
||||
Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting)
|
||||
or at client side with the usual `sort` command according to [these docs](#querying-via-command-line).
|
||||
|
||||
By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy) is queried.
|
||||
If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches
|
||||
for log messages at `(AccountID=12, ProjectID=34)` tenant:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID: 34' -d 'query=error'
|
||||
```
|
||||
|
||||
The number of requests to `/select/logsql/query` can be [monitored](#monitoring) with `vl_http_requests_total{path="/select/logsql/query"}` metric.
|
||||
|
||||
#### Querying via command-line
|
||||
|
||||
VictoriaLogs provides good integration with `curl` and other command-line tools because of the following features:
|
||||
|
||||
- VictoriaLogs sends the matching log entries to the response stream as soon as they are found.
|
||||
This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix)).
|
||||
- VictoriaLogs automatically adjusts query execution speed to the speed of the client, which reads the response stream.
|
||||
For example, if the response stream is piped to `less` command, then the query is suspended
|
||||
until the `less` command reads the next block from the response stream.
|
||||
- VictoriaLogs automatically cancels query execution when the client closes the response stream.
|
||||
For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query
|
||||
when the `head` command closes the response stream.
|
||||
|
||||
These features allow executing queries at command-line interface, which potentially select billions of rows,
|
||||
without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server.
|
||||
|
||||
For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains
|
||||
many log messages with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word):
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error'
|
||||
```
|
||||
|
||||
If the command returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query.
|
||||
VictoriaLogs notices that the response stream is closed, so it cancels the query and instantly stops consuming CPU, RAM and disk IO for this query.
|
||||
|
||||
Then just use `head` command for investigating the returned log messages and narrowing down the query:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error' | head -10
|
||||
```
|
||||
|
||||
The `head -10` command reads only the first 10 log messages from the response and then closes the response stream.
|
||||
This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources.
|
||||
|
||||
Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error' | less
|
||||
```
|
||||
|
||||
The `less` command reads the response stream on demand, when the user scrolls down the output.
|
||||
VictoriaLogs suspends query execution when `less` stops reading the response stream.
|
||||
It doesn't consume CPU and disk IO resources during this time. It resumes query execution
|
||||
when the `less` continues reading the response stream.
|
||||
|
||||
Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain
|
||||
`cannot open file` [phrase](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#phrase-filter).
|
||||
Then the query can be narrowed down to `error AND "cannot open file"`
|
||||
(see [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator).
|
||||
Then run the updated command in order to continue the investigation:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error AND "cannot open file"' | head
|
||||
```
|
||||
|
||||
Note that the `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl`
|
||||
or similar tools.
|
||||
|
||||
The `pipe the query to "head" or "less" -> investigate the results -> refine the query` iteration
|
||||
can be repeated multiple times until the needed log messages are found.
|
||||
|
||||
The returned VictoriaLogs query response can be post-processed with any combination of Unix commands,
|
||||
which are usually used for log analysis - `grep`, `jq`, `awk`, `sort`, `uniq`, `wc`, etc.
|
||||
|
||||
For example, the following command uses `wc -l` Unix command for counting the number of log messages
|
||||
with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word)
|
||||
received from [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) with `app="nginx"` field
|
||||
during the last 5 minutes:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:[now-5m,now] AND error' | wc -l
|
||||
```
|
||||
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) about `_stream` filter,
|
||||
[these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) about `_time` filter
|
||||
and [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator.
|
||||
|
||||
The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field):
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less
|
||||
```
|
||||
|
||||
This command uses `jq` for extracting [`_time`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field)
|
||||
and [`_msg`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) fields from the returned results,
|
||||
and piping them to `sort` command.
|
||||
|
||||
Note that the `sort` command needs to read all the response stream before returning the sorted results. So the command above
|
||||
can take non-trivial amounts of time if the `query` returns too many results. The solution is to narrow down the `query`
|
||||
before sorting the results. See [these tips](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#performance-tips)
|
||||
on how to narrow down query results.
|
||||
|
||||
The following example calculates stats on the number of log messages received during the last 5 minutes
|
||||
grouped by `log.level` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_time:[now-5m,now] log.level:*' | jq -r '."log.level"' | sort | uniq -c
|
||||
```
|
||||
|
||||
The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter),
|
||||
then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values
|
||||
are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats.
|
||||
|
||||
See also:
|
||||
|
||||
- [Key concepts](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html).
|
||||
- [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
|
||||
|
||||
|
||||
### Monitoring
|
||||
|
||||
VictoriaLogs exposes internal metrics in Prometheus exposition format at `http://localhost:9428/metrics` page.
|
||||
It is recommended to set up monitoring of these metrics via VictoriaMetrics
|
||||
(see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)),
|
||||
vmagent (see [these docs](https://docs.victoriametrics.com/vmagent.html#how-to-collect-metrics-in-prometheus-format)) or via Prometheus.
|
||||
|
||||
VictoriaLogs emits own logs to stdout. It is recommended investigating these logs during troubleshooting.
|
||||
|
||||
|
||||
### Retention
|
||||
|
||||
By default VictoriaLogs stores log entries with timestamps in the time range `[now-7d, now]`, while dropping logs outside the given time range.
|
||||
E.g. it uses the retention of 7 days. The retention can be configured with `-retentionPeriod` command-line flag.
|
||||
This flag accepts values starting from `1d` (one day) up to `100y` (100 years). See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations)
|
||||
for the supported duration formats.
|
||||
|
||||
For example, the following command starts VictoriaLogs with the retention of 8 weeks:
|
||||
|
||||
```bash
|
||||
/path/to/victoria-logs -retentionPeriod=8w
|
||||
```
|
||||
|
||||
VictoriaLogs stores the [ingested](#data-ingestion) logs in per-day partition directories. It automatically drops partition directories
|
||||
outside the configured retention.
|
||||
|
||||
VictoriaLogs automatically drops logs at [data ingestion](#data-ingestion) stage if they have timestamps outside the configured retention.
|
||||
A sample of dropped logs is logged with `WARN` message in order to simplify troubleshooting.
|
||||
The `vlinsert_rows_dropped_total` [metric](#monitoring) is incremented each time an ingested log entry is dropped because of timestamp outside the retention.
|
||||
It is recommended setting up the following alerting rule at [vmalert](https://docs.victoriametrics.com/vmalert.html) in order to be notified
|
||||
when logs with wrong timestamps are ingested into VictoriaLogs:
|
||||
|
||||
```metricsql
|
||||
rate(vlinsert_rows_dropped_total[5m]) > 0
|
||||
```
|
||||
|
||||
By default VictoriaLogs doesn't accept log entries with timestamps bigger than `now+2d`, e.g. 2 days in the future.
|
||||
If you need accepting logs with bigger timestamps, then specify the desired "future retention" via `-futureRetention` command-line flag.
|
||||
This flag accepts values starting from `1d`. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-durations)
|
||||
for the supported duration formats.
|
||||
|
||||
For example, the following command starts VictoriaLogs, which accepts logs with timestamps up to a year in the future:
|
||||
|
||||
```bash
|
||||
/path/to/victoria-logs -futureRetention=1y
|
||||
```
|
||||
|
||||
### Storage
|
||||
|
||||
VictoriaLogs stores all its data in a single directory - `victoria-logs-data`. The path to the directory can be changed via `-storageDataPath` command-line flag.
|
||||
For example, the following command starts VictoriaLogs, which stores the data at `/var/lib/victoria-logs`:
|
||||
|
||||
```bash
|
||||
/path/to/victoria-logs -storageDataPath=/var/lib/victoria-logs
|
||||
```
|
||||
|
||||
VictoriaLogs automatically creates the `-storageDataPath` directory on the first run if it is missing.
|
37
docs/VictoriaLogs/Roadmap.md
Normal file
37
docs/VictoriaLogs/Roadmap.md
Normal file
|
@ -0,0 +1,37 @@
|
|||
# VictoriaLogs roadmap
|
||||
|
||||
The VictoriaLogs Preview is ready for evaluation in production. It is recommended running it alongside the existing solutions
|
||||
such as ElasticSearch and Grafana Loki and comparing their resource usage and usability.
|
||||
It isn't recommended migrating from existing solutions to VictoriaLogs Preview yet.
|
||||
|
||||
The following functionality is available in VictoriaLogs Preview:
|
||||
|
||||
- [Data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
- [Querying](https://docs.victoriametrics.com/VictoriaLogs/#querying).
|
||||
- [Querying via command-line](https://docs.victoriametrics.com/VictoriaLogs/#querying-via-command-line).
|
||||
|
||||
See [operation docs](https://docs.victoriametrics.com/VictoriaLogs/#operation) for details.
|
||||
|
||||
The following functionality is planned in the future versions of VictoriaLogs:
|
||||
|
||||
- Support for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) from popular log collectors and formats:
|
||||
- Promtail (aka Grafana Loki)
|
||||
- Vector.dev
|
||||
- Fluentbit
|
||||
- Fluentd
|
||||
- Syslog
|
||||
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
|
||||
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
|
||||
- [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
|
||||
- [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters).
|
||||
- [Stats calculations](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stats).
|
||||
- [Sorting](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sorting).
|
||||
- [Limiters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#limiters).
|
||||
- The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function.
|
||||
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
|
||||
- Web UI with the following abilities:
|
||||
- Explore the ingested logs.
|
||||
- Build graphs over time for the ingested logs.
|
||||
- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).
|
||||
- Cluster version of VictoriaLogs.
|
||||
- Ability to store data to object storage (such as S3, GCS, Minio).
|
219
docs/VictoriaLogs/keyConcepts.md
Normal file
219
docs/VictoriaLogs/keyConcepts.md
Normal file
|
@ -0,0 +1,219 @@
|
|||
# VictoriaLogs key concepts
|
||||
|
||||
## Data model
|
||||
|
||||
VictoriaLogs works with structured logs. Every log entry may contain arbitrary number of `key=value` pairs (aka fields).
|
||||
A single log entry can be expressed as a single-level [JSON](https://www.json.org/json-en.html) object with string keys and values.
|
||||
For example:
|
||||
|
||||
```json
|
||||
{
|
||||
"job": "my-app",
|
||||
"instance": "host123:4567",
|
||||
"level": "error",
|
||||
"client_ip": "1.2.3.4",
|
||||
"trace_id": "1234-56789-abcdef",
|
||||
"_msg": "failed to serve the client request"
|
||||
}
|
||||
```
|
||||
|
||||
VictoriaLogs automatically transforms multi-level JSON (aka nested JSON) into single-level JSON
|
||||
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) according to the following rules:
|
||||
|
||||
- Nested dictionaries are flattened by concatenating dictionary keys with `.` char. For example, the following multi-level JSON
|
||||
is transformed into the following single-level JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"host": {
|
||||
"name": "foobar"
|
||||
"os": {
|
||||
"version": "1.2.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"host.name": "foobar",
|
||||
"host.os.version": "1.2.3"
|
||||
}
|
||||
```
|
||||
|
||||
- Arrays, numbers and boolean values are converted into strings. This simplifies [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) over such values.
|
||||
For example, the following JSON with an array, a number and a boolean value is converted into the following JSON with string values:
|
||||
|
||||
```json
|
||||
{
|
||||
"tags": ["foo", "bar"],
|
||||
"offset": 12345,
|
||||
"is_error": false
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"tags": "[\"foo\", \"bar\"]",
|
||||
"offset": "12345",
|
||||
"is_error": "false"
|
||||
}
|
||||
```
|
||||
|
||||
Both label name and label value may contain arbitrary chars. Such chars must be encoded
|
||||
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
|
||||
according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
|
||||
Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:
|
||||
|
||||
```json
|
||||
{
|
||||
"label with whitepsace": "value\nwith\nnewlines",
|
||||
"Поле": "价值",
|
||||
}
|
||||
```
|
||||
|
||||
VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) logs.
|
||||
This enables [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) across all the fields.
|
||||
|
||||
VictoriaLogs supports the following field types:
|
||||
|
||||
* [`_msg` field](#message-field)
|
||||
* [`_time` field](#time-field)
|
||||
* [`_stream` fields](#stream-fields)
|
||||
* [other fields](#other-fields)
|
||||
|
||||
|
||||
### Message field
|
||||
|
||||
Every ingested [log entry](#data-model) must contain at least a `_msg` field with the actual log message. For example, this is the minimal
|
||||
log entry, which can be ingested into VictoriaLogs:
|
||||
|
||||
```json
|
||||
{
|
||||
"_msg": "some log message"
|
||||
}
|
||||
```
|
||||
|
||||
If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field
|
||||
via `_msg_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg
|
||||
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
|
||||
### Time field
|
||||
|
||||
The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry.
|
||||
For example:
|
||||
|
||||
```json
|
||||
{
|
||||
"_msg": "some log message",
|
||||
"_time": "2023-04-12T06:38:11.095Z"
|
||||
}
|
||||
```
|
||||
|
||||
If the actual timestamp has other than `_time` field name, then it is possible to specify the real timestamp
|
||||
field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
For example, if timestamp is located in the `event.created` field, then specify `_time_field=event.created` query arg
|
||||
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
|
||||
If `_time` field is missing, then the data ingestion time is used as log entry timestamp.
|
||||
|
||||
The log entry timestamp allows quickly narrowing down the search to a particular time range.
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details.
|
||||
|
||||
### Stream fields
|
||||
|
||||
Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries.
|
||||
This may be either a single field such as `instance=host123:456` or a set of fields such as
|
||||
`(datacenter=..., env=..., job=..., instance=...)` or
|
||||
`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`.
|
||||
|
||||
Log entries received from a single application instance form a log stream in VictoriaLogs.
|
||||
VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits:
|
||||
|
||||
- Reduced disk space usage, since a log stream from a single application instance is usually compressed better
|
||||
than a mixed log stream from multiple distinct applications.
|
||||
|
||||
- Increased query performance, since VictoriaLogs needs to scan lower amounts of data
|
||||
when [searching by stream labels](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
|
||||
|
||||
VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream,
|
||||
so it stores all the received log entries in a single default stream - `{}`.
|
||||
This may lead to not-so-optimal resource usage and query performance.
|
||||
|
||||
Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
|
||||
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion).
|
||||
For example, if logs from Kubernetes containers have the following fields:
|
||||
|
||||
```json
|
||||
{
|
||||
"kubernetes.namespace": "some-namespace",
|
||||
"kubernetes.node.name": "some-node",
|
||||
"kubernetes.pod.name": "some-pod",
|
||||
"kubernetes.container.name": "some-container",
|
||||
"_msg": "some log message"
|
||||
}
|
||||
```
|
||||
|
||||
then sepcify `_stream_fields=kubernetes.namespace,kubernetes.node.name,kubernetes.pod.name,kubernetes.container.name`
|
||||
query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion) in order to properly store
|
||||
per-container logs into distinct streams.
|
||||
|
||||
#### How to determine which fields must be associated with log streams?
|
||||
|
||||
[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions:
|
||||
|
||||
- Fields, which remain constant across log entries received from a single application instance.
|
||||
- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc.
|
||||
|
||||
Sometimes a single application instance may generate multiple log streams and store them into distinct log files.
|
||||
In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields.
|
||||
|
||||
Structured logs may contain big number of fields, which do not change across log entries received from a single application instance.
|
||||
There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify
|
||||
the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
|
||||
can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
|
||||
|
||||
Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
|
||||
|
||||
#### High cardinality
|
||||
|
||||
Some fields in the [ingested logs](#data-model) may contain big number of unique values across log entries.
|
||||
For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values.
|
||||
VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields).
|
||||
|
||||
Never associate high-cardinality fields with [log streams](#stream-fields), since this may result
|
||||
to the following issues:
|
||||
|
||||
- Performance degradation during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
|
||||
and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying)
|
||||
- Increased memory usage
|
||||
- Increased CPU usage
|
||||
- Increased disk space usage
|
||||
- Increased disk read / write IO
|
||||
|
||||
VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriametrics.com/VictoriaLogs/#monitoring),
|
||||
which shows the number of created streams since the last VictoriaLogs restart. If this metric grows at a rapid rate
|
||||
during long period of time, then there are high chances of high cardinality issues mentioned above.
|
||||
VictoriaLogs can log all the newly registered streams when `-logNewStreams` command-line flag is passed to it.
|
||||
This can help narrowing down and eliminating high-cardinality fields from [log streams](#stream-fields).
|
||||
|
||||
### Other fields
|
||||
|
||||
The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries.
|
||||
For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message.
|
||||
E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
|
||||
|
||||
See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for more details.
|
||||
|
||||
## Multitenancy
|
||||
|
||||
VictoriaLogs supports multitenancy. A tenant is identified by `(AccountID, ProjectID)` pair, where `AccountID` and `ProjectID` are arbitrary 32-bit unsigned integeres.
|
||||
The `AccountID` and `ProjectID` fields can be set during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/#data-ingestion)
|
||||
and [querying](https://docs.victoriametrics.com/VictoriaLogs/#querying) via `AccountID` and `ProjectID` request headers.
|
||||
|
||||
If `AccountID` and/or `ProjectID` request headers aren't set, then the default `0` value is used.
|
||||
|
||||
VictoriaLogs has very low overhead for per-tenant management, so it is OK to have thousands of tenants in a single VictoriaLogs instance.
|
||||
|
||||
VictoriaLogs doesn't perform per-tenant authorization. Use [vmauth](https://docs.victoriametrics.com/vmauth.html) or similar tools for per-tenant authorization.
|
31
lib/logstorage/arena.go
Normal file
31
lib/logstorage/arena.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
type arena struct {
|
||||
b []byte
|
||||
}
|
||||
|
||||
func (a *arena) reset() {
|
||||
a.b = a.b[:0]
|
||||
}
|
||||
|
||||
func (a *arena) copyBytes(b []byte) []byte {
|
||||
ab := a.b
|
||||
abLen := len(ab)
|
||||
ab = append(ab, b...)
|
||||
result := ab[abLen:]
|
||||
a.b = ab
|
||||
return result
|
||||
}
|
||||
|
||||
func (a *arena) newBytes(size int) []byte {
|
||||
ab := a.b
|
||||
abLen := len(ab)
|
||||
ab = bytesutil.ResizeWithCopyMayOverallocate(ab, abLen+size)
|
||||
result := ab[abLen:]
|
||||
a.b = ab
|
||||
return result
|
||||
}
|
650
lib/logstorage/block.go
Normal file
650
lib/logstorage/block.go
Normal file
|
@ -0,0 +1,650 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// block represents a block of log entries.
|
||||
type block struct {
|
||||
// timestamps contains timestamps for log entries.
|
||||
timestamps []int64
|
||||
|
||||
// columns contains values for fields seen in log entries.
|
||||
columns []column
|
||||
|
||||
// constColumns contains fields with constant values across all the block entries.
|
||||
constColumns []Field
|
||||
}
|
||||
|
||||
func (b *block) reset() {
|
||||
b.timestamps = b.timestamps[:0]
|
||||
|
||||
cs := b.columns
|
||||
for i := range cs {
|
||||
cs[i].reset()
|
||||
}
|
||||
b.columns = cs[:0]
|
||||
|
||||
ccs := b.constColumns
|
||||
for i := range ccs {
|
||||
ccs[i].Reset()
|
||||
}
|
||||
b.constColumns = ccs[:0]
|
||||
}
|
||||
|
||||
// uncompressedSizeBytes returns the total size of the origianl log entries stored in b.
|
||||
//
|
||||
// It is supposed that every log entry has the following format:
|
||||
//
|
||||
// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
|
||||
func (b *block) uncompressedSizeBytes() uint64 {
|
||||
rowsCount := uint64(b.Len())
|
||||
|
||||
// Take into account timestamps
|
||||
n := rowsCount * uint64(len(time.RFC3339Nano))
|
||||
|
||||
// Take into account columns
|
||||
cs := b.columns
|
||||
for i := range cs {
|
||||
c := &cs[i]
|
||||
nameLen := uint64(len(c.name))
|
||||
if nameLen == 0 {
|
||||
nameLen = uint64(len("_msg"))
|
||||
}
|
||||
for _, v := range c.values {
|
||||
if len(v) > 0 {
|
||||
n += nameLen + 2 + uint64(len(v))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Take into account constColumns
|
||||
ccs := b.constColumns
|
||||
for i := range ccs {
|
||||
cc := &ccs[i]
|
||||
nameLen := uint64(len(cc.Name))
|
||||
if nameLen == 0 {
|
||||
nameLen = uint64(len("_msg"))
|
||||
}
|
||||
n += rowsCount * (2 + nameLen + uint64(len(cc.Value)))
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
// uncompressedRowsSizeBytes returns the size of the uncompressed rows.
|
||||
//
|
||||
// It is supposed that every row has the following format:
|
||||
//
|
||||
// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
|
||||
func uncompressedRowsSizeBytes(rows [][]Field) uint64 {
|
||||
n := uint64(0)
|
||||
for _, fields := range rows {
|
||||
n += uncompressedRowSizeBytes(fields)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// uncompressedRowSizeBytes returns the size of uncompressed row.
|
||||
//
|
||||
// It is supposed that the row has the following format:
|
||||
//
|
||||
// 2006-01-02T15:04:05.999999999Z07:00 field1=value1 ... fieldN=valueN
|
||||
func uncompressedRowSizeBytes(fields []Field) uint64 {
|
||||
n := uint64(len(time.RFC3339Nano)) // log timestamp
|
||||
for _, f := range fields {
|
||||
nameLen := len(f.Name)
|
||||
if nameLen == 0 {
|
||||
nameLen = len("_msg")
|
||||
}
|
||||
n += uint64(2 + nameLen + len(f.Value))
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// column contains values for the given field name seen in log entries.
|
||||
type column struct {
|
||||
// name is the field name
|
||||
name string
|
||||
|
||||
// values is the values seen for the given log entries.
|
||||
values []string
|
||||
}
|
||||
|
||||
func (c *column) reset() {
|
||||
c.name = ""
|
||||
|
||||
values := c.values
|
||||
for i := range values {
|
||||
values[i] = ""
|
||||
}
|
||||
c.values = values[:0]
|
||||
}
|
||||
|
||||
func (c *column) areSameValues() bool {
|
||||
values := c.values
|
||||
if len(values) < 2 {
|
||||
return true
|
||||
}
|
||||
value := values[0]
|
||||
for _, v := range values[1:] {
|
||||
if value != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *column) resizeValues(valuesLen int) []string {
|
||||
values := c.values
|
||||
if n := valuesLen - cap(values); n > 0 {
|
||||
values = append(values[:cap(values)], make([]string, n)...)
|
||||
}
|
||||
values = values[:valuesLen]
|
||||
c.values = values
|
||||
return values
|
||||
}
|
||||
|
||||
// mustWriteTo writes c to sw and updates ch accordingly.
|
||||
func (c *column) mustWriteTo(ch *columnHeader, sw *streamWriters) {
|
||||
ch.reset()
|
||||
|
||||
valuesWriter := &sw.fieldValuesWriter
|
||||
bloomFilterWriter := &sw.fieldBloomFilterWriter
|
||||
if c.name == "" {
|
||||
valuesWriter = &sw.messageValuesWriter
|
||||
bloomFilterWriter = &sw.messageBloomFilterWriter
|
||||
}
|
||||
|
||||
ch.name = c.name
|
||||
|
||||
// encode values
|
||||
ve := getValuesEncoder()
|
||||
ch.valueType, ch.minValue, ch.maxValue = ve.encode(c.values, &ch.valuesDict)
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
defer longTermBufPool.Put(bb)
|
||||
|
||||
// marshal values
|
||||
bb.B = marshalStringsBlock(bb.B[:0], ve.values)
|
||||
putValuesEncoder(ve)
|
||||
ch.valuesSize = uint64(len(bb.B))
|
||||
if ch.valuesSize > maxValuesBlockSize {
|
||||
logger.Panicf("BUG: too valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize)
|
||||
}
|
||||
ch.valuesOffset = valuesWriter.bytesWritten
|
||||
valuesWriter.MustWrite(bb.B)
|
||||
|
||||
// create and marshal bloom filter for c.values
|
||||
if ch.valueType != valueTypeDict {
|
||||
tokensBuf := getTokensBuf()
|
||||
tokensBuf.A = tokenizeStrings(tokensBuf.A[:0], c.values)
|
||||
bb.B = bloomFilterMarshal(bb.B[:0], tokensBuf.A)
|
||||
putTokensBuf(tokensBuf)
|
||||
} else {
|
||||
// there is no need in ecoding bloom filter for dictiory type,
|
||||
// since it isn't used during querying - all the dictionary values are available in ch.valuesDict
|
||||
bb.B = bb.B[:0]
|
||||
}
|
||||
ch.bloomFilterSize = uint64(len(bb.B))
|
||||
if ch.bloomFilterSize > maxBloomFilterBlockSize {
|
||||
logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize)
|
||||
}
|
||||
ch.bloomFilterOffset = bloomFilterWriter.bytesWritten
|
||||
bloomFilterWriter.MustWrite(bb.B)
|
||||
}
|
||||
|
||||
func (b *block) assertValid() {
|
||||
// Check that timestamps are in ascending order
|
||||
timestamps := b.timestamps
|
||||
for i := 1; i < len(timestamps); i++ {
|
||||
if timestamps[i-1] > timestamps[i] {
|
||||
logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d",
|
||||
timestamps[i-1], timestamps[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the number of items in each column matches the number of items in the block.
|
||||
itemsCount := len(timestamps)
|
||||
columns := b.columns
|
||||
for _, c := range columns {
|
||||
if len(c.values) != itemsCount {
|
||||
logger.Panicf("BUG: unexpected number of values for column %q: got %d; want %d", c.name, len(c.values), itemsCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MustInitFromRows initializes b from the given timestamps and rows.
|
||||
//
|
||||
// It is expected that timestamps are sorted.
|
||||
func (b *block) MustInitFromRows(timestamps []int64, rows [][]Field) {
|
||||
b.reset()
|
||||
|
||||
assertTimestampsSorted(timestamps)
|
||||
b.timestamps = append(b.timestamps, timestamps...)
|
||||
b.mustInitFromRows(rows)
|
||||
b.sortColumnsByName()
|
||||
}
|
||||
|
||||
func (b *block) mustInitFromRows(rows [][]Field) {
|
||||
rowsLen := len(rows)
|
||||
if rowsLen == 0 {
|
||||
// Nothing to do
|
||||
return
|
||||
}
|
||||
|
||||
if areSameFieldsInRows(rows) {
|
||||
// Fast path - all the log entries have the same fields
|
||||
fields := rows[0]
|
||||
for i := range fields {
|
||||
f := &fields[i]
|
||||
if areSameValuesForColumn(rows, i) {
|
||||
cc := b.extendConstColumns()
|
||||
cc.Name = f.Name
|
||||
cc.Value = f.Value
|
||||
} else {
|
||||
c := b.extendColumns()
|
||||
c.name = f.Name
|
||||
values := c.resizeValues(rowsLen)
|
||||
for j := range rows {
|
||||
values[j] = rows[j][i].Value
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Slow path - log entries contain different set of fields
|
||||
|
||||
// Determine indexes for columns
|
||||
columnIdxs := getColumnIdxs()
|
||||
for i := range rows {
|
||||
fields := rows[i]
|
||||
for j := range fields {
|
||||
name := fields[j].Name
|
||||
if _, ok := columnIdxs[name]; !ok {
|
||||
columnIdxs[name] = len(columnIdxs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize columns
|
||||
cs := b.resizeColumns(len(columnIdxs))
|
||||
for name, idx := range columnIdxs {
|
||||
c := &cs[idx]
|
||||
c.name = name
|
||||
c.resizeValues(rowsLen)
|
||||
}
|
||||
|
||||
// Write rows to block
|
||||
for i := range rows {
|
||||
for _, f := range rows[i] {
|
||||
idx := columnIdxs[f.Name]
|
||||
cs[idx].values[i] = f.Value
|
||||
}
|
||||
}
|
||||
putColumnIdxs(columnIdxs)
|
||||
|
||||
// Detect const columns
|
||||
for i := len(cs) - 1; i >= 0; i-- {
|
||||
c := &cs[i]
|
||||
if !c.areSameValues() {
|
||||
continue
|
||||
}
|
||||
cc := b.extendConstColumns()
|
||||
cc.Name = c.name
|
||||
cc.Value = c.values[0]
|
||||
|
||||
c.reset()
|
||||
if i < len(cs)-1 {
|
||||
swapColumns(c, &cs[len(cs)-1])
|
||||
}
|
||||
cs = cs[:len(cs)-1]
|
||||
}
|
||||
b.columns = cs
|
||||
}
|
||||
|
||||
func swapColumns(a, b *column) {
|
||||
*a, *b = *b, *a
|
||||
}
|
||||
|
||||
func areSameValuesForColumn(rows [][]Field, colIdx int) bool {
|
||||
if len(rows) < 2 {
|
||||
return true
|
||||
}
|
||||
value := rows[0][colIdx].Value
|
||||
rows = rows[1:]
|
||||
for i := range rows {
|
||||
if value != rows[i][colIdx].Value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func assertTimestampsSorted(timestamps []int64) {
|
||||
for i := range timestamps {
|
||||
if i > 0 && timestamps[i-1] > timestamps[i] {
|
||||
logger.Panicf("BUG: log entries must be sorted by timestamp; got the previous entry with bigger timestamp %d than the current entry with timestamp %d",
|
||||
timestamps[i-1], timestamps[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *block) extendConstColumns() *Field {
|
||||
ccs := b.constColumns
|
||||
if cap(ccs) > len(ccs) {
|
||||
ccs = ccs[:len(ccs)+1]
|
||||
} else {
|
||||
ccs = append(ccs, Field{})
|
||||
}
|
||||
b.constColumns = ccs
|
||||
return &ccs[len(ccs)-1]
|
||||
}
|
||||
|
||||
func (b *block) extendColumns() *column {
|
||||
cs := b.columns
|
||||
if cap(cs) > len(cs) {
|
||||
cs = cs[:len(cs)+1]
|
||||
} else {
|
||||
cs = append(cs, column{})
|
||||
}
|
||||
b.columns = cs
|
||||
return &cs[len(cs)-1]
|
||||
}
|
||||
|
||||
func (b *block) resizeColumns(columnsLen int) []column {
|
||||
cs := b.columns[:0]
|
||||
if n := columnsLen - cap(cs); n > 0 {
|
||||
cs = append(cs[:cap(cs)], make([]column, n)...)
|
||||
}
|
||||
cs = cs[:columnsLen]
|
||||
b.columns = cs
|
||||
return cs
|
||||
}
|
||||
|
||||
func (b *block) sortColumnsByName() {
|
||||
if len(b.columns)+len(b.constColumns) > maxColumnsPerBlock {
|
||||
logger.Panicf("BUG: too big number of columns detected in the block: %d; the number of columns mustn't exceed %d",
|
||||
len(b.columns)+len(b.constColumns), maxColumnsPerBlock)
|
||||
}
|
||||
|
||||
cs := getColumnsSorter()
|
||||
cs.columns = b.columns
|
||||
sort.Sort(cs)
|
||||
putColumnsSorter(cs)
|
||||
|
||||
ccs := getConstColumnsSorter()
|
||||
ccs.columns = b.constColumns
|
||||
sort.Sort(ccs)
|
||||
putConstColumnsSorter(ccs)
|
||||
}
|
||||
|
||||
// Len returns the number of log entries in b.
|
||||
func (b *block) Len() int {
|
||||
return len(b.timestamps)
|
||||
}
|
||||
|
||||
// InitFromBlockData unmarshals bd to b.
|
||||
//
|
||||
// sbu and vd are used as a temporary storage for unmarshaled column values.
|
||||
//
|
||||
// The b becomes outdated after sbu or vd is reset.
|
||||
func (b *block) InitFromBlockData(bd *blockData, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error {
|
||||
b.reset()
|
||||
|
||||
if bd.rowsCount > maxRowsPerBlock {
|
||||
return fmt.Errorf("too many entries found in the block: %d; mustn't exceed %d", bd.rowsCount, maxRowsPerBlock)
|
||||
}
|
||||
rowsCount := int(bd.rowsCount)
|
||||
|
||||
// unmarshal timestamps
|
||||
td := &bd.timestampsData
|
||||
var err error
|
||||
b.timestamps, err = encoding.UnmarshalTimestamps(b.timestamps[:0], td.data, td.marshalType, td.minTimestamp, rowsCount)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal timestamps: %w", err)
|
||||
}
|
||||
|
||||
// unmarshal columns
|
||||
cds := bd.columnsData
|
||||
cs := b.resizeColumns(len(cds))
|
||||
for i := range cds {
|
||||
cd := &cds[i]
|
||||
c := &cs[i]
|
||||
c.name = cd.name
|
||||
c.values, err = sbu.unmarshal(c.values[:0], cd.valuesData, uint64(rowsCount))
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal column %d: %w", i, err)
|
||||
}
|
||||
if err = vd.decodeInplace(c.values, cd.valueType, &cd.valuesDict); err != nil {
|
||||
return fmt.Errorf("cannot decode column values: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// unmarshal constColumns
|
||||
b.constColumns = append(b.constColumns[:0], bd.constColumns...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mustWriteTo writes b with the given sid to sw and updates bh accordingly
|
||||
func (b *block) mustWriteTo(sid *streamID, bh *blockHeader, sw *streamWriters) {
|
||||
// Do not store the version used for encoding directly in the block data, since:
|
||||
// - all the blocks in the same part use the same encoding
|
||||
// - the block encoding version can be put in metadata file for the part (aka metadataFilename)
|
||||
|
||||
b.assertValid()
|
||||
bh.reset()
|
||||
|
||||
bh.streamID = *sid
|
||||
bh.uncompressedSizeBytes = b.uncompressedSizeBytes()
|
||||
bh.rowsCount = uint64(b.Len())
|
||||
|
||||
// Marshal timestamps
|
||||
mustWriteTimestampsTo(&bh.timestampsHeader, b.timestamps, sw)
|
||||
|
||||
// Marshal columns
|
||||
cs := b.columns
|
||||
csh := getColumnsHeader()
|
||||
chs := csh.resizeColumnHeaders(len(cs))
|
||||
for i := range cs {
|
||||
cs[i].mustWriteTo(&chs[i], sw)
|
||||
}
|
||||
csh.constColumns = append(csh.constColumns[:0], b.constColumns...)
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = csh.marshal(bb.B)
|
||||
putColumnsHeader(csh)
|
||||
bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten
|
||||
bh.columnsHeaderSize = uint64(len(bb.B))
|
||||
if bh.columnsHeaderSize > maxColumnsHeaderSize {
|
||||
logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize)
|
||||
}
|
||||
sw.columnsHeaderWriter.MustWrite(bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
|
||||
// appendRows appends log entries from b to dst.
|
||||
func (b *block) appendRows(dst *rows) {
|
||||
// copy timestamps
|
||||
dst.timestamps = append(dst.timestamps, b.timestamps...)
|
||||
|
||||
// copy columns
|
||||
fieldsBuf := dst.fieldsBuf
|
||||
ccs := b.constColumns
|
||||
cs := b.columns
|
||||
for i := range b.timestamps {
|
||||
fieldsLen := len(fieldsBuf)
|
||||
// copy const columns
|
||||
for j := range ccs {
|
||||
cc := &ccs[j]
|
||||
fieldsBuf = append(fieldsBuf, Field{
|
||||
Name: cc.Name,
|
||||
Value: cc.Value,
|
||||
})
|
||||
}
|
||||
// copy other columns
|
||||
for j := range cs {
|
||||
c := &cs[j]
|
||||
value := c.values[i]
|
||||
if len(value) == 0 {
|
||||
continue
|
||||
}
|
||||
fieldsBuf = append(fieldsBuf, Field{
|
||||
Name: c.name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
dst.rows = append(dst.rows, fieldsBuf[fieldsLen:])
|
||||
}
|
||||
dst.fieldsBuf = fieldsBuf
|
||||
}
|
||||
|
||||
func areSameFieldsInRows(rows [][]Field) bool {
|
||||
if len(rows) < 2 {
|
||||
return true
|
||||
}
|
||||
fields := rows[0]
|
||||
rows = rows[1:]
|
||||
for i := range rows {
|
||||
leFields := rows[i]
|
||||
if len(fields) != len(leFields) {
|
||||
return false
|
||||
}
|
||||
for j := range leFields {
|
||||
if leFields[j].Name != fields[j].Name {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var columnIdxsPool sync.Pool
|
||||
|
||||
func getColumnIdxs() map[string]int {
|
||||
v := columnIdxsPool.Get()
|
||||
if v == nil {
|
||||
return make(map[string]int)
|
||||
}
|
||||
return v.(map[string]int)
|
||||
}
|
||||
|
||||
func putColumnIdxs(m map[string]int) {
|
||||
for k := range m {
|
||||
delete(m, k)
|
||||
}
|
||||
columnIdxsPool.Put(m)
|
||||
}
|
||||
|
||||
func getBlock() *block {
|
||||
v := blockPool.Get()
|
||||
if v == nil {
|
||||
return &block{}
|
||||
}
|
||||
return v.(*block)
|
||||
}
|
||||
|
||||
func putBlock(b *block) {
|
||||
b.reset()
|
||||
blockPool.Put(b)
|
||||
}
|
||||
|
||||
var blockPool sync.Pool
|
||||
|
||||
type columnsSorter struct {
|
||||
columns []column
|
||||
}
|
||||
|
||||
func (cs *columnsSorter) reset() {
|
||||
cs.columns = nil
|
||||
}
|
||||
|
||||
func (cs *columnsSorter) Len() int {
|
||||
return len(cs.columns)
|
||||
}
|
||||
|
||||
func (cs *columnsSorter) Less(i, j int) bool {
|
||||
columns := cs.columns
|
||||
return columns[i].name < columns[j].name
|
||||
}
|
||||
|
||||
func (cs *columnsSorter) Swap(i, j int) {
|
||||
columns := cs.columns
|
||||
columns[i], columns[j] = columns[j], columns[i]
|
||||
}
|
||||
|
||||
func getColumnsSorter() *columnsSorter {
|
||||
v := columnsSorterPool.Get()
|
||||
if v == nil {
|
||||
return &columnsSorter{}
|
||||
}
|
||||
return v.(*columnsSorter)
|
||||
}
|
||||
|
||||
func putColumnsSorter(cs *columnsSorter) {
|
||||
cs.reset()
|
||||
columnsSorterPool.Put(cs)
|
||||
}
|
||||
|
||||
var columnsSorterPool sync.Pool
|
||||
|
||||
type constColumnsSorter struct {
|
||||
columns []Field
|
||||
}
|
||||
|
||||
func (ccs *constColumnsSorter) reset() {
|
||||
ccs.columns = nil
|
||||
}
|
||||
|
||||
func (ccs *constColumnsSorter) Len() int {
|
||||
return len(ccs.columns)
|
||||
}
|
||||
|
||||
func (ccs *constColumnsSorter) Less(i, j int) bool {
|
||||
columns := ccs.columns
|
||||
return columns[i].Name < columns[j].Name
|
||||
}
|
||||
|
||||
func (ccs *constColumnsSorter) Swap(i, j int) {
|
||||
columns := ccs.columns
|
||||
columns[i], columns[j] = columns[j], columns[i]
|
||||
}
|
||||
|
||||
func getConstColumnsSorter() *constColumnsSorter {
|
||||
v := constColumnsSorterPool.Get()
|
||||
if v == nil {
|
||||
return &constColumnsSorter{}
|
||||
}
|
||||
return v.(*constColumnsSorter)
|
||||
}
|
||||
|
||||
func putConstColumnsSorter(ccs *constColumnsSorter) {
|
||||
ccs.reset()
|
||||
constColumnsSorterPool.Put(ccs)
|
||||
}
|
||||
|
||||
var constColumnsSorterPool sync.Pool
|
||||
|
||||
// mustWriteTimestampsTo writes timestamps to sw and updates th accordingly
|
||||
func mustWriteTimestampsTo(th *timestampsHeader, timestamps []int64, sw *streamWriters) {
|
||||
th.reset()
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B, th.marshalType, th.minTimestamp = encoding.MarshalTimestamps(bb.B[:0], timestamps, 64)
|
||||
if len(bb.B) > maxTimestampsBlockSize {
|
||||
logger.Panicf("BUG: too big block with timestamps: %d bytes; the maximum supported size is %d bytes", len(bb.B), maxTimestampsBlockSize)
|
||||
}
|
||||
th.maxTimestamp = timestamps[len(timestamps)-1]
|
||||
th.blockOffset = sw.timestampsWriter.bytesWritten
|
||||
th.blockSize = uint64(len(bb.B))
|
||||
sw.timestampsWriter.MustWrite(bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
383
lib/logstorage/block_data.go
Normal file
383
lib/logstorage/block_data.go
Normal file
|
@ -0,0 +1,383 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// blockData contains packed data for a single block.
|
||||
//
|
||||
// The main purpose of this struct is to reduce the work needed during background merge of parts.
|
||||
// If the block is full, then the blockData can be written to the destination part
|
||||
// without the need to unpack it.
|
||||
type blockData struct {
|
||||
// streamID is id of the stream for the data
|
||||
streamID streamID
|
||||
|
||||
// uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block
|
||||
uncompressedSizeBytes uint64
|
||||
|
||||
// rowsCount is the number of log entries in the block
|
||||
rowsCount uint64
|
||||
|
||||
// timestampsData contains the encoded timestamps data for the block
|
||||
timestampsData timestampsData
|
||||
|
||||
// columnsData contains packed per-column data.
|
||||
columnsData []columnData
|
||||
|
||||
// constColumns contains data for const columns across the block.
|
||||
constColumns []Field
|
||||
|
||||
// a is used for storing byte slices for timestamps and columns.
|
||||
//
|
||||
// It reduces fragmentation for them.
|
||||
a arena
|
||||
}
|
||||
|
||||
// reset resets bd for subsequent re-use
|
||||
func (bd *blockData) reset() {
|
||||
bd.streamID.reset()
|
||||
bd.uncompressedSizeBytes = 0
|
||||
bd.rowsCount = 0
|
||||
bd.timestampsData.reset()
|
||||
|
||||
cds := bd.columnsData
|
||||
for i := range cds {
|
||||
cds[i].reset()
|
||||
}
|
||||
bd.columnsData = cds[:0]
|
||||
|
||||
ccs := bd.constColumns
|
||||
for i := range ccs {
|
||||
ccs[i].Reset()
|
||||
}
|
||||
bd.constColumns = ccs[:0]
|
||||
|
||||
bd.a.reset()
|
||||
}
|
||||
|
||||
func (bd *blockData) resizeColumnsData(columnsDataLen int) []columnData {
|
||||
cds := bd.columnsData
|
||||
if n := columnsDataLen - cap(cds); n > 0 {
|
||||
cds = append(cds[:cap(cds)], make([]columnData, n)...)
|
||||
}
|
||||
cds = cds[:columnsDataLen]
|
||||
bd.columnsData = cds
|
||||
return cds
|
||||
}
|
||||
|
||||
// copyFrom copies src to bd.
|
||||
func (bd *blockData) copyFrom(src *blockData) {
|
||||
bd.reset()
|
||||
|
||||
bd.streamID = src.streamID
|
||||
bd.uncompressedSizeBytes = src.uncompressedSizeBytes
|
||||
bd.rowsCount = src.rowsCount
|
||||
bd.timestampsData.copyFrom(&src.timestampsData, &bd.a)
|
||||
|
||||
cdsSrc := src.columnsData
|
||||
cds := bd.resizeColumnsData(len(cdsSrc))
|
||||
for i := range cds {
|
||||
cds[i].copyFrom(&cdsSrc[i], &bd.a)
|
||||
}
|
||||
bd.columnsData = cds
|
||||
|
||||
bd.constColumns = append(bd.constColumns[:0], src.constColumns...)
|
||||
}
|
||||
|
||||
// unmarshalRows appends unmarshaled from bd log entries to dst.
|
||||
//
|
||||
// The returned log entries are valid until sbu and vd are valid.
|
||||
func (bd *blockData) unmarshalRows(dst *rows, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error {
|
||||
b := getBlock()
|
||||
defer putBlock(b)
|
||||
|
||||
if err := b.InitFromBlockData(bd, sbu, vd); err != nil {
|
||||
return err
|
||||
}
|
||||
b.appendRows(dst)
|
||||
return nil
|
||||
}
|
||||
|
||||
// mustWriteTo writes bd with the given sid to sw and updates bh accordingly
|
||||
func (bd *blockData) mustWriteTo(bh *blockHeader, sw *streamWriters) {
|
||||
// Do not store the version used for encoding directly in the block data, since:
|
||||
// - all the blocks in the same part use the same encoding
|
||||
// - the block encoding version can be put in metadata file for the part (aka metadataFilename)
|
||||
|
||||
bh.reset()
|
||||
|
||||
bh.streamID = bd.streamID
|
||||
bh.uncompressedSizeBytes = bd.uncompressedSizeBytes
|
||||
bh.rowsCount = bd.rowsCount
|
||||
|
||||
// Marshal timestamps
|
||||
bd.timestampsData.mustWriteTo(&bh.timestampsHeader, sw)
|
||||
|
||||
// Marshal columns
|
||||
cds := bd.columnsData
|
||||
csh := getColumnsHeader()
|
||||
chs := csh.resizeColumnHeaders(len(cds))
|
||||
for i := range cds {
|
||||
cds[i].mustWriteTo(&chs[i], sw)
|
||||
}
|
||||
csh.constColumns = append(csh.constColumns[:0], bd.constColumns...)
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = csh.marshal(bb.B)
|
||||
putColumnsHeader(csh)
|
||||
bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten
|
||||
bh.columnsHeaderSize = uint64(len(bb.B))
|
||||
if bh.columnsHeaderSize > maxColumnsHeaderSize {
|
||||
logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize)
|
||||
}
|
||||
sw.columnsHeaderWriter.MustWrite(bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
|
||||
// mustReadFrom reads block data associated with bh from sr to bd.
|
||||
func (bd *blockData) mustReadFrom(bh *blockHeader, sr *streamReaders) {
|
||||
bd.reset()
|
||||
|
||||
bd.streamID = bh.streamID
|
||||
bd.uncompressedSizeBytes = bh.uncompressedSizeBytes
|
||||
bd.rowsCount = bh.rowsCount
|
||||
|
||||
// Read timestamps
|
||||
bd.timestampsData.mustReadFrom(&bh.timestampsHeader, sr, &bd.a)
|
||||
|
||||
// Read columns
|
||||
if bh.columnsHeaderOffset != sr.columnsHeaderReader.bytesRead {
|
||||
logger.Panicf("FATAL: %s: unexpected columnsHeaderOffset=%d; must equal to the number of bytes read: %d",
|
||||
sr.columnsHeaderReader.Path(), bh.columnsHeaderOffset, sr.columnsHeaderReader.bytesRead)
|
||||
}
|
||||
columnsHeaderSize := bh.columnsHeaderSize
|
||||
if columnsHeaderSize > maxColumnsHeaderSize {
|
||||
logger.Panicf("BUG: %s: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", sr.columnsHeaderReader.Path(), columnsHeaderSize, maxColumnsHeaderSize)
|
||||
}
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize))
|
||||
sr.columnsHeaderReader.MustReadFull(bb.B)
|
||||
|
||||
csh := getColumnsHeader()
|
||||
if err := csh.unmarshal(bb.B); err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal columnsHeader: %s", sr.columnsHeaderReader.Path(), err)
|
||||
}
|
||||
longTermBufPool.Put(bb)
|
||||
chs := csh.columnHeaders
|
||||
cds := bd.resizeColumnsData(len(chs))
|
||||
for i := range chs {
|
||||
cds[i].mustReadFrom(&chs[i], sr, &bd.a)
|
||||
}
|
||||
bd.constColumns = append(bd.constColumns[:0], csh.constColumns...)
|
||||
putColumnsHeader(csh)
|
||||
}
|
||||
|
||||
// timestampsData contains the encoded timestamps data.
|
||||
type timestampsData struct {
|
||||
// data contains packed timestamps data.
|
||||
data []byte
|
||||
|
||||
// marshalType is the marshal type for timestamps
|
||||
marshalType encoding.MarshalType
|
||||
|
||||
// minTimestamp is the minimum timestamp in the timestamps data
|
||||
minTimestamp int64
|
||||
|
||||
// maxTimestamp is the maximum timestamp in the timestamps data
|
||||
maxTimestamp int64
|
||||
}
|
||||
|
||||
// reset resets td for subsequent re-use
|
||||
func (td *timestampsData) reset() {
|
||||
td.data = nil
|
||||
td.marshalType = 0
|
||||
td.minTimestamp = 0
|
||||
td.maxTimestamp = 0
|
||||
}
|
||||
|
||||
// copyFrom copies src to td.
|
||||
func (td *timestampsData) copyFrom(src *timestampsData, a *arena) {
|
||||
td.reset()
|
||||
|
||||
td.data = a.copyBytes(src.data)
|
||||
td.marshalType = src.marshalType
|
||||
td.minTimestamp = src.minTimestamp
|
||||
td.maxTimestamp = src.maxTimestamp
|
||||
}
|
||||
|
||||
// mustWriteTo writes td to sw and updates th accordingly
|
||||
func (td *timestampsData) mustWriteTo(th *timestampsHeader, sw *streamWriters) {
|
||||
th.reset()
|
||||
|
||||
th.marshalType = td.marshalType
|
||||
th.minTimestamp = td.minTimestamp
|
||||
th.maxTimestamp = td.maxTimestamp
|
||||
th.blockOffset = sw.timestampsWriter.bytesWritten
|
||||
th.blockSize = uint64(len(td.data))
|
||||
if th.blockSize > maxTimestampsBlockSize {
|
||||
logger.Panicf("BUG: too big timestampsHeader.blockSize: %d bytes; mustn't exceed %d bytes", th.blockSize, maxTimestampsBlockSize)
|
||||
}
|
||||
sw.timestampsWriter.MustWrite(td.data)
|
||||
}
|
||||
|
||||
// mustReadFrom reads timestamps data associated with th from sr to td.
|
||||
func (td *timestampsData) mustReadFrom(th *timestampsHeader, sr *streamReaders, a *arena) {
|
||||
td.reset()
|
||||
|
||||
td.marshalType = th.marshalType
|
||||
td.minTimestamp = th.minTimestamp
|
||||
td.maxTimestamp = th.maxTimestamp
|
||||
|
||||
timestampsReader := &sr.timestampsReader
|
||||
if th.blockOffset != timestampsReader.bytesRead {
|
||||
logger.Panicf("FATAL: %s: unexpected timestampsHeader.blockOffset=%d; must equal to the number of bytes read: %d",
|
||||
timestampsReader.Path(), th.blockOffset, timestampsReader.bytesRead)
|
||||
}
|
||||
timestampsBlockSize := th.blockSize
|
||||
if timestampsBlockSize > maxTimestampsBlockSize {
|
||||
logger.Panicf("FATAL: %s: too big timestamps block with %d bytes; the maximum supported block size is %d bytes",
|
||||
timestampsReader.Path(), timestampsBlockSize, maxTimestampsBlockSize)
|
||||
}
|
||||
td.data = a.newBytes(int(timestampsBlockSize))
|
||||
timestampsReader.MustReadFull(td.data)
|
||||
}
|
||||
|
||||
// columnData contains packed data for a single column.
|
||||
type columnData struct {
|
||||
// name is the column name
|
||||
name string
|
||||
|
||||
// valueType is the type of values stored in valuesData
|
||||
valueType valueType
|
||||
|
||||
// minValue is the minimum encoded uint* or float64 value in the columnHeader
|
||||
//
|
||||
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
||||
minValue uint64
|
||||
|
||||
// maxValue is the maximum encoded uint* or float64 value in the columnHeader
|
||||
//
|
||||
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
||||
maxValue uint64
|
||||
|
||||
// valuesDict contains unique values for valueType = valueTypeDict
|
||||
valuesDict valuesDict
|
||||
|
||||
// valuesData contains packed values data for the given column
|
||||
valuesData []byte
|
||||
|
||||
// bloomFilterData contains packed bloomFilter data for the given column
|
||||
bloomFilterData []byte
|
||||
}
|
||||
|
||||
// reset rests cd for subsequent re-use
|
||||
func (cd *columnData) reset() {
|
||||
cd.name = ""
|
||||
cd.valueType = 0
|
||||
|
||||
cd.minValue = 0
|
||||
cd.maxValue = 0
|
||||
cd.valuesDict.reset()
|
||||
|
||||
cd.valuesData = nil
|
||||
cd.bloomFilterData = nil
|
||||
}
|
||||
|
||||
// copyFrom copies src to cd.
|
||||
func (cd *columnData) copyFrom(src *columnData, a *arena) {
|
||||
cd.reset()
|
||||
|
||||
cd.name = src.name
|
||||
cd.valueType = src.valueType
|
||||
|
||||
cd.minValue = src.minValue
|
||||
cd.maxValue = src.maxValue
|
||||
cd.valuesDict.copyFrom(&src.valuesDict)
|
||||
|
||||
cd.valuesData = a.copyBytes(src.valuesData)
|
||||
cd.bloomFilterData = a.copyBytes(src.bloomFilterData)
|
||||
}
|
||||
|
||||
// mustWriteTo writes cd to sw and updates ch accordingly.
|
||||
func (cd *columnData) mustWriteTo(ch *columnHeader, sw *streamWriters) {
|
||||
ch.reset()
|
||||
|
||||
valuesWriter := &sw.fieldValuesWriter
|
||||
bloomFilterWriter := &sw.fieldBloomFilterWriter
|
||||
if cd.name == "" {
|
||||
valuesWriter = &sw.messageValuesWriter
|
||||
bloomFilterWriter = &sw.messageBloomFilterWriter
|
||||
}
|
||||
|
||||
ch.name = cd.name
|
||||
ch.valueType = cd.valueType
|
||||
|
||||
ch.minValue = cd.minValue
|
||||
ch.maxValue = cd.maxValue
|
||||
ch.valuesDict.copyFrom(&cd.valuesDict)
|
||||
|
||||
// marshal values
|
||||
ch.valuesSize = uint64(len(cd.valuesData))
|
||||
if ch.valuesSize > maxValuesBlockSize {
|
||||
logger.Panicf("BUG: too big valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize)
|
||||
}
|
||||
ch.valuesOffset = valuesWriter.bytesWritten
|
||||
valuesWriter.MustWrite(cd.valuesData)
|
||||
|
||||
// marshal bloom filter
|
||||
ch.bloomFilterSize = uint64(len(cd.bloomFilterData))
|
||||
if ch.bloomFilterSize > maxBloomFilterBlockSize {
|
||||
logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize)
|
||||
}
|
||||
ch.bloomFilterOffset = bloomFilterWriter.bytesWritten
|
||||
bloomFilterWriter.MustWrite(cd.bloomFilterData)
|
||||
}
|
||||
|
||||
// mustReadFrom reads columns data associated with ch from sr to cd.
|
||||
func (cd *columnData) mustReadFrom(ch *columnHeader, sr *streamReaders, a *arena) {
|
||||
cd.reset()
|
||||
|
||||
valuesReader := &sr.fieldValuesReader
|
||||
bloomFilterReader := &sr.fieldBloomFilterReader
|
||||
if ch.name == "" {
|
||||
valuesReader = &sr.messageValuesReader
|
||||
bloomFilterReader = &sr.messageBloomFilterReader
|
||||
}
|
||||
|
||||
cd.name = ch.name
|
||||
cd.valueType = ch.valueType
|
||||
|
||||
cd.minValue = ch.minValue
|
||||
cd.maxValue = ch.maxValue
|
||||
cd.valuesDict.copyFrom(&ch.valuesDict)
|
||||
|
||||
// read values
|
||||
if ch.valuesOffset != valuesReader.bytesRead {
|
||||
logger.Panicf("FATAL: %s: unexpected columnHeader.valuesOffset=%d; must equal to the number of bytes read: %d",
|
||||
valuesReader.Path(), ch.valuesOffset, valuesReader.bytesRead)
|
||||
}
|
||||
valuesSize := ch.valuesSize
|
||||
if valuesSize > maxValuesBlockSize {
|
||||
logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", valuesReader.Path(), maxValuesBlockSize, valuesSize)
|
||||
}
|
||||
cd.valuesData = a.newBytes(int(valuesSize))
|
||||
valuesReader.MustReadFull(cd.valuesData)
|
||||
|
||||
// read bloom filter
|
||||
// bloom filter is missing in valueTypeDict.
|
||||
if ch.valueType != valueTypeDict {
|
||||
if ch.bloomFilterOffset != bloomFilterReader.bytesRead {
|
||||
logger.Panicf("FATAL: %s: unexpected columnHeader.bloomFilterOffset=%d; must equal to the number of bytes read: %d",
|
||||
bloomFilterReader.Path(), ch.bloomFilterOffset, bloomFilterReader.bytesRead)
|
||||
}
|
||||
bloomFilterSize := ch.bloomFilterSize
|
||||
if bloomFilterSize > maxBloomFilterBlockSize {
|
||||
logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bloomFilterReader.Path(), maxBloomFilterBlockSize, bloomFilterSize)
|
||||
}
|
||||
cd.bloomFilterData = a.newBytes(int(bloomFilterSize))
|
||||
bloomFilterReader.MustReadFull(cd.bloomFilterData)
|
||||
}
|
||||
}
|
106
lib/logstorage/block_data_test.go
Normal file
106
lib/logstorage/block_data_test.go
Normal file
|
@ -0,0 +1,106 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
func TestBlockDataReset(t *testing.T) {
|
||||
bd := &blockData{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 432,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 2344,
|
||||
rowsCount: 134,
|
||||
timestampsData: timestampsData{
|
||||
data: []byte("foo"),
|
||||
marshalType: encoding.MarshalTypeDeltaConst,
|
||||
minTimestamp: 1234,
|
||||
maxTimestamp: 23443,
|
||||
},
|
||||
columnsData: []columnData{
|
||||
{
|
||||
name: "foo",
|
||||
valueType: valueTypeUint16,
|
||||
valuesData: []byte("aaa"),
|
||||
bloomFilterData: []byte("bsdf"),
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
bd.reset()
|
||||
bdZero := &blockData{
|
||||
columnsData: []columnData{},
|
||||
constColumns: []Field{},
|
||||
}
|
||||
if !reflect.DeepEqual(bd, bdZero) {
|
||||
t.Fatalf("unexpected non-zero blockData after reset: %v", bd)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockDataCopyFrom(t *testing.T) {
|
||||
f := func(bd *blockData) {
|
||||
t.Helper()
|
||||
var bd2 blockData
|
||||
bd2.copyFrom(bd)
|
||||
bd2.a.b = nil
|
||||
if !reflect.DeepEqual(bd, &bd2) {
|
||||
t.Fatalf("unexpected blockData copy\ngot\n%v\nwant\n%v", &bd2, bd)
|
||||
}
|
||||
|
||||
// Try copying it again to the same destination
|
||||
bd2.copyFrom(bd)
|
||||
bd2.a.b = nil
|
||||
if !reflect.DeepEqual(bd, &bd2) {
|
||||
t.Fatalf("unexpected blockData copy to the same destination\ngot\n%v\nwant\n%v", &bd2, bd)
|
||||
}
|
||||
}
|
||||
f(&blockData{})
|
||||
|
||||
bd := &blockData{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 432,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 8943,
|
||||
rowsCount: 134,
|
||||
timestampsData: timestampsData{
|
||||
data: []byte("foo"),
|
||||
marshalType: encoding.MarshalTypeDeltaConst,
|
||||
minTimestamp: 1234,
|
||||
maxTimestamp: 23443,
|
||||
},
|
||||
columnsData: []columnData{
|
||||
{
|
||||
name: "foo",
|
||||
valueType: valueTypeUint16,
|
||||
valuesData: []byte("aaa"),
|
||||
bloomFilterData: []byte("bsdf"),
|
||||
},
|
||||
{
|
||||
name: "bar",
|
||||
valuesData: []byte("aaa"),
|
||||
bloomFilterData: []byte("bsdf"),
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "foobar",
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(bd)
|
||||
}
|
766
lib/logstorage/block_header.go
Normal file
766
lib/logstorage/block_header.go
Normal file
|
@ -0,0 +1,766 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// blockHeader contains information about a single block.
|
||||
//
|
||||
// blockHeader is stored in the indexFilename file.
|
||||
type blockHeader struct {
|
||||
// streamID is a stream id for entries in the block
|
||||
streamID streamID
|
||||
|
||||
// uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block
|
||||
uncompressedSizeBytes uint64
|
||||
|
||||
// rowsCount is the number of log entries stored in the block
|
||||
rowsCount uint64
|
||||
|
||||
// timestampsHeader contains information about timestamps for log entries in the block
|
||||
timestampsHeader timestampsHeader
|
||||
|
||||
// columnsHeaderOffset is the offset of columnsHeader at columnsHeaderFilename
|
||||
columnsHeaderOffset uint64
|
||||
|
||||
// columnsHeaderSize is the size of columnsHeader at columnsHeaderFilename
|
||||
columnsHeaderSize uint64
|
||||
}
|
||||
|
||||
// reset resets bh, so it can be re-used.
|
||||
func (bh *blockHeader) reset() {
|
||||
bh.streamID.reset()
|
||||
bh.uncompressedSizeBytes = 0
|
||||
bh.rowsCount = 0
|
||||
bh.timestampsHeader.reset()
|
||||
bh.columnsHeaderOffset = 0
|
||||
bh.columnsHeaderSize = 0
|
||||
}
|
||||
|
||||
func (bh *blockHeader) copyFrom(src *blockHeader) {
|
||||
bh.reset()
|
||||
|
||||
bh.streamID = src.streamID
|
||||
bh.uncompressedSizeBytes = src.uncompressedSizeBytes
|
||||
bh.rowsCount = src.rowsCount
|
||||
bh.timestampsHeader.copyFrom(&src.timestampsHeader)
|
||||
bh.columnsHeaderOffset = src.columnsHeaderOffset
|
||||
bh.columnsHeaderSize = src.columnsHeaderSize
|
||||
}
|
||||
|
||||
// marshal appends the marshaled bh to dst and returns the result.
|
||||
func (bh *blockHeader) marshal(dst []byte) []byte {
|
||||
// Do not store the version used for encoding directly in the block header, since:
|
||||
// - all the block headers in the same part use the same encoding
|
||||
// - the block header encoding version can be put in metadata file for the part (aka metadataFilename)
|
||||
|
||||
dst = bh.streamID.marshal(dst)
|
||||
dst = encoding.MarshalVarUint64(dst, bh.uncompressedSizeBytes)
|
||||
dst = encoding.MarshalVarUint64(dst, bh.rowsCount)
|
||||
dst = bh.timestampsHeader.marshal(dst)
|
||||
dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderOffset)
|
||||
dst = encoding.MarshalVarUint64(dst, bh.columnsHeaderSize)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals bh from src and returns the remaining tail.
|
||||
func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) {
|
||||
bh.reset()
|
||||
|
||||
srcOrig := src
|
||||
|
||||
// unmarshal bh.streamID
|
||||
tail, err := bh.streamID.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
// unmarshal bh.uncompressedSizeBytes
|
||||
tail, n, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal uncompressedSizeBytes: %w", err)
|
||||
}
|
||||
bh.uncompressedSizeBytes = n
|
||||
src = tail
|
||||
|
||||
// unmarshal bh.rowsCount
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal rowsCount: %w", err)
|
||||
}
|
||||
if n > maxRowsPerBlock {
|
||||
return srcOrig, fmt.Errorf("too big value for rowsCount: %d; mustn't exceed %d", n, maxRowsPerBlock)
|
||||
}
|
||||
bh.rowsCount = n
|
||||
src = tail
|
||||
|
||||
// unmarshal bh.timestampsHeader
|
||||
tail, err = bh.timestampsHeader.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal timestampsHeader: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
// unmarshal columnsHeaderOffset
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderOffset: %w", err)
|
||||
}
|
||||
bh.columnsHeaderOffset = n
|
||||
src = tail
|
||||
|
||||
// unmarshal columnsHeaderSize
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderSize: %w", err)
|
||||
}
|
||||
if n > maxColumnsHeaderSize {
|
||||
return srcOrig, fmt.Errorf("too big value for columnsHeaderSize: %d; mustn't exceed %d", n, maxColumnsHeaderSize)
|
||||
}
|
||||
bh.columnsHeaderSize = n
|
||||
src = tail
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
func getBlockHeader() *blockHeader {
|
||||
v := blockHeaderPool.Get()
|
||||
if v == nil {
|
||||
return &blockHeader{}
|
||||
}
|
||||
return v.(*blockHeader)
|
||||
}
|
||||
|
||||
func putBlockHeader(bh *blockHeader) {
|
||||
bh.reset()
|
||||
blockHeaderPool.Put(bh)
|
||||
}
|
||||
|
||||
var blockHeaderPool sync.Pool
|
||||
|
||||
// unmarshalBlockHeaders appends unmarshaled from src blockHeader entries to dst and returns the result.
|
||||
func unmarshalBlockHeaders(dst []blockHeader, src []byte) ([]blockHeader, error) {
|
||||
dstOrig := dst
|
||||
for len(src) > 0 {
|
||||
if len(dst) < cap(dst) {
|
||||
dst = dst[:len(dst)+1]
|
||||
} else {
|
||||
dst = append(dst, blockHeader{})
|
||||
}
|
||||
bh := &dst[len(dst)-1]
|
||||
tail, err := bh.unmarshal(src)
|
||||
if err != nil {
|
||||
return dstOrig, fmt.Errorf("cannot unmarshal blockHeader entries: %w", err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
if err := validateBlockHeaders(dst[len(dstOrig):]); err != nil {
|
||||
return dstOrig, err
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func validateBlockHeaders(bhs []blockHeader) error {
|
||||
for i := 1; i < len(bhs); i++ {
|
||||
bhCurr := &bhs[i]
|
||||
bhPrev := &bhs[i-1]
|
||||
if bhCurr.streamID.less(&bhPrev.streamID) {
|
||||
return fmt.Errorf("unexpected blockHeader with smaller streamID=%s after bigger streamID=%s at position %d", &bhCurr.streamID, &bhPrev.streamID, i)
|
||||
}
|
||||
if !bhCurr.streamID.equal(&bhPrev.streamID) {
|
||||
continue
|
||||
}
|
||||
thCurr := bhCurr.timestampsHeader
|
||||
thPrev := bhPrev.timestampsHeader
|
||||
if thCurr.minTimestamp < thPrev.minTimestamp {
|
||||
return fmt.Errorf("unexpected blockHeader with smaller timestamp=%d after bigger timestamp=%d at position %d", thCurr.minTimestamp, thPrev.minTimestamp, i)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetBlockHeaders(bhs []blockHeader) []blockHeader {
|
||||
for i := range bhs {
|
||||
bhs[i].reset()
|
||||
}
|
||||
return bhs[:0]
|
||||
}
|
||||
|
||||
func getColumnsHeader() *columnsHeader {
|
||||
v := columnsHeaderPool.Get()
|
||||
if v == nil {
|
||||
return &columnsHeader{}
|
||||
}
|
||||
return v.(*columnsHeader)
|
||||
}
|
||||
|
||||
func putColumnsHeader(csh *columnsHeader) {
|
||||
csh.reset()
|
||||
columnsHeaderPool.Put(csh)
|
||||
}
|
||||
|
||||
var columnsHeaderPool sync.Pool
|
||||
|
||||
// columnsHeader contains information about columns in a single block.
|
||||
//
|
||||
// columnsHeader is stored in the columnsHeaderFilename file.
|
||||
type columnsHeader struct {
|
||||
// columnHeaders contains the information about every column seen in the block.
|
||||
columnHeaders []columnHeader
|
||||
|
||||
// constColumns contain fields with constant values across all the block entries.
|
||||
constColumns []Field
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) reset() {
|
||||
chs := csh.columnHeaders
|
||||
for i := range chs {
|
||||
chs[i].reset()
|
||||
}
|
||||
csh.columnHeaders = chs[:0]
|
||||
|
||||
ccs := csh.constColumns
|
||||
for i := range ccs {
|
||||
ccs[i].Reset()
|
||||
}
|
||||
csh.constColumns = ccs[:0]
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) getConstColumnValue(name string) string {
|
||||
if name == "_msg" {
|
||||
name = ""
|
||||
}
|
||||
ccs := csh.constColumns
|
||||
for i := range ccs {
|
||||
cc := &ccs[i]
|
||||
if cc.Name == name {
|
||||
return cc.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) getColumnHeader(name string) *columnHeader {
|
||||
if name == "_msg" {
|
||||
name = ""
|
||||
}
|
||||
chs := csh.columnHeaders
|
||||
for i := range chs {
|
||||
ch := &chs[i]
|
||||
if ch.name == name {
|
||||
return ch
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) resizeConstColumns(columnsLen int) []Field {
|
||||
ccs := csh.constColumns
|
||||
if n := columnsLen - cap(ccs); n > 0 {
|
||||
ccs = append(ccs[:cap(ccs)], make([]Field, n)...)
|
||||
}
|
||||
ccs = ccs[:columnsLen]
|
||||
csh.constColumns = ccs
|
||||
return ccs
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) resizeColumnHeaders(columnHeadersLen int) []columnHeader {
|
||||
chs := csh.columnHeaders
|
||||
if n := columnHeadersLen - cap(chs); n > 0 {
|
||||
chs = append(chs[:cap(chs)], make([]columnHeader, n)...)
|
||||
}
|
||||
chs = chs[:columnHeadersLen]
|
||||
csh.columnHeaders = chs
|
||||
return chs
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) marshal(dst []byte) []byte {
|
||||
chs := csh.columnHeaders
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(chs)))
|
||||
for i := range chs {
|
||||
dst = chs[i].marshal(dst)
|
||||
}
|
||||
|
||||
ccs := csh.constColumns
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(ccs)))
|
||||
for i := range ccs {
|
||||
dst = ccs[i].marshal(dst)
|
||||
}
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) unmarshal(src []byte) error {
|
||||
csh.reset()
|
||||
|
||||
// unmarshal columnHeaders
|
||||
tail, n, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal columnHeaders len: %w", err)
|
||||
}
|
||||
if n > maxColumnsPerBlock {
|
||||
return fmt.Errorf("too many column headers: %d; mustn't exceed %d", n, maxColumnsPerBlock)
|
||||
}
|
||||
src = tail
|
||||
chs := csh.resizeColumnHeaders(int(n))
|
||||
for i := range chs {
|
||||
tail, err = chs[i].unmarshal(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal columnHeader %d out of %d columnHeaders: %w", i, len(chs), err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
csh.columnHeaders = chs
|
||||
|
||||
// unmarshal constColumns
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal constColumns len: %w", err)
|
||||
}
|
||||
if n+uint64(len(csh.columnHeaders)) > maxColumnsPerBlock {
|
||||
return fmt.Errorf("too many columns: %d; mustn't exceed %d", n+uint64(len(csh.columnHeaders)), maxColumnsPerBlock)
|
||||
}
|
||||
src = tail
|
||||
ccs := csh.resizeConstColumns(int(n))
|
||||
for i := range ccs {
|
||||
tail, err = ccs[i].unmarshal(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal constColumn %d out of %d columns: %w", i, len(ccs), err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
|
||||
// Verify that the src is empty
|
||||
if len(src) > 0 {
|
||||
return fmt.Errorf("unexpected non-empty tail left after unmarshaling columnsHeader: len(tail)=%d", len(src))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// columnHeaders contains information for values, which belong to a single label in a single block.
|
||||
//
|
||||
// The main column with an empty name is stored in messageValuesFilename,
|
||||
// while the rest of columns are stored in fieldValuesFilename.
|
||||
// This allows minimizing disk read IO when filtering by non-message columns.
|
||||
//
|
||||
// Every block column contains also a bloom filter for all the tokens stored in the column.
|
||||
// This bloom filter is used for fast determining whether the given block may contain the given tokens.
|
||||
//
|
||||
// Tokens in bloom filter depend on valueType:
|
||||
//
|
||||
// - valueTypeString stores lowercased tokens seen in all the values
|
||||
// - valueTypeDict doesn't store anything in the bloom filter, since all the encoded values
|
||||
// are available directly in the valuesDict field
|
||||
// - valueTypeUint8, valueTypeUint16, valueTypeUint32 and valueTypeUint64 stores encoded uint values
|
||||
// - valueTypeFloat64 stores encoded float64 values
|
||||
// - valueTypeIPv4 stores encoded into uint32 ips
|
||||
// - valueTypeTimestampISO8601 stores encoded into uint64 timestamps
|
||||
//
|
||||
// Bloom filters for main column with an empty name is stored in messageBloomFilename,
|
||||
// while the rest of columns are stored in fieldBloomFilename.
|
||||
type columnHeader struct {
|
||||
// name contains column name aka label name
|
||||
name string
|
||||
|
||||
// valueType is the type of values stored in the block
|
||||
valueType valueType
|
||||
|
||||
// minValue is the minimum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader
|
||||
//
|
||||
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
||||
minValue uint64
|
||||
|
||||
// maxValue is the maximum encoded value for uint*, ipv4, timestamp and float64 value in the columnHeader
|
||||
//
|
||||
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
||||
maxValue uint64
|
||||
|
||||
// valuesDict contains unique values for valueType = valueTypeDict
|
||||
valuesDict valuesDict
|
||||
|
||||
// valuesOffset contains the offset of the block in either messageValuesFilename or fieldValuesFilename
|
||||
valuesOffset uint64
|
||||
|
||||
// valuesSize contains the size of the block in either messageValuesFilename or fieldValuesFilename
|
||||
valuesSize uint64
|
||||
|
||||
// bloomFilterOffset contains the offset of the bloom filter in either messageBloomFilename or fieldBloomFilename
|
||||
bloomFilterOffset uint64
|
||||
|
||||
// bloomFilterSize contains the size of the bloom filter in either messageBloomFilename or fieldBloomFilename
|
||||
bloomFilterSize uint64
|
||||
}
|
||||
|
||||
// reset resets ch
|
||||
func (ch *columnHeader) reset() {
|
||||
ch.name = ""
|
||||
ch.valueType = 0
|
||||
|
||||
ch.minValue = 0
|
||||
ch.maxValue = 0
|
||||
ch.valuesDict.reset()
|
||||
|
||||
ch.valuesOffset = 0
|
||||
ch.valuesSize = 0
|
||||
|
||||
ch.bloomFilterOffset = 0
|
||||
ch.bloomFilterSize = 0
|
||||
}
|
||||
|
||||
// marshal appends marshaled ch to dst and returns the result.
|
||||
func (ch *columnHeader) marshal(dst []byte) []byte {
|
||||
// check minValue/maxValue
|
||||
if ch.valueType == valueTypeFloat64 {
|
||||
minValue := math.Float64frombits(ch.minValue)
|
||||
maxValue := math.Float64frombits(ch.maxValue)
|
||||
if minValue > maxValue {
|
||||
logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g", minValue, maxValue)
|
||||
}
|
||||
} else {
|
||||
if ch.minValue > ch.maxValue {
|
||||
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d", ch.minValue, ch.maxValue)
|
||||
}
|
||||
}
|
||||
|
||||
// Encode common fields - ch.name and ch.valueType
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(ch.name))
|
||||
dst = append(dst, byte(ch.valueType))
|
||||
|
||||
// Encode other fields depending on ch.valueType
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeDict:
|
||||
dst = ch.valuesDict.marshal(dst)
|
||||
dst = ch.marshalValues(dst)
|
||||
case valueTypeUint8:
|
||||
dst = append(dst, byte(ch.minValue))
|
||||
dst = append(dst, byte(ch.maxValue))
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeUint16:
|
||||
dst = encoding.MarshalUint16(dst, uint16(ch.minValue))
|
||||
dst = encoding.MarshalUint16(dst, uint16(ch.maxValue))
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeUint32:
|
||||
dst = encoding.MarshalUint32(dst, uint32(ch.minValue))
|
||||
dst = encoding.MarshalUint32(dst, uint32(ch.maxValue))
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeUint64:
|
||||
dst = encoding.MarshalUint64(dst, ch.minValue)
|
||||
dst = encoding.MarshalUint64(dst, ch.maxValue)
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeFloat64:
|
||||
// float64 values are encoded as uint64 via math.Float64bits()
|
||||
dst = encoding.MarshalUint64(dst, ch.minValue)
|
||||
dst = encoding.MarshalUint64(dst, ch.maxValue)
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeIPv4:
|
||||
dst = encoding.MarshalUint32(dst, uint32(ch.minValue))
|
||||
dst = encoding.MarshalUint32(dst, uint32(ch.maxValue))
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeTimestampISO8601:
|
||||
// timestamps are encoded in nanoseconds
|
||||
dst = encoding.MarshalUint64(dst, ch.minValue)
|
||||
dst = encoding.MarshalUint64(dst, ch.maxValue)
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", ch.valueType)
|
||||
}
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
func (ch *columnHeader) marshalValuesAndBloomFilters(dst []byte) []byte {
|
||||
dst = ch.marshalValues(dst)
|
||||
dst = ch.marshalBloomFilters(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (ch *columnHeader) marshalValues(dst []byte) []byte {
|
||||
dst = encoding.MarshalVarUint64(dst, ch.valuesOffset)
|
||||
dst = encoding.MarshalVarUint64(dst, ch.valuesSize)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (ch *columnHeader) marshalBloomFilters(dst []byte) []byte {
|
||||
dst = encoding.MarshalVarUint64(dst, ch.bloomFilterOffset)
|
||||
dst = encoding.MarshalVarUint64(dst, ch.bloomFilterSize)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals ch from src and returns the tail left after unmarshaling.
|
||||
func (ch *columnHeader) unmarshal(src []byte) ([]byte, error) {
|
||||
ch.reset()
|
||||
|
||||
srcOrig := src
|
||||
|
||||
// Unmarshal column name
|
||||
tail, data, err := encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal column name: %w", err)
|
||||
}
|
||||
// Do not use bytesutil.InternBytes(data) here, since it works slower than the string(data) in prod
|
||||
ch.name = string(data)
|
||||
src = tail
|
||||
|
||||
// Unmarshal value type
|
||||
if len(src) < 1 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal valueType from 0 bytes for column %q; need at least 1 byte", ch.name)
|
||||
}
|
||||
ch.valueType = valueType(src[0])
|
||||
src = src[1:]
|
||||
|
||||
// Unmarshal the rest of data depending on valueType
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeString for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeDict:
|
||||
tail, err = ch.valuesDict.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal dict at valueTypeDict for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
tail, err = ch.unmarshalValues(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values at valueTypeDict for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeUint8:
|
||||
if len(src) < 2 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint8 from %d bytes for column %q; need at least 2 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = uint64(src[0])
|
||||
ch.maxValue = uint64(src[1])
|
||||
src = src[2:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint8 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeUint16:
|
||||
if len(src) < 4 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint16 from %d bytes for column %q; need at least 4 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = uint64(encoding.UnmarshalUint16(src))
|
||||
ch.maxValue = uint64(encoding.UnmarshalUint16(src[2:]))
|
||||
src = src[4:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint16 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeUint32:
|
||||
if len(src) < 8 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint32 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = uint64(encoding.UnmarshalUint32(src))
|
||||
ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:]))
|
||||
src = src[8:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint32 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeUint64:
|
||||
if len(src) < 16 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeUint64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = encoding.UnmarshalUint64(src)
|
||||
ch.maxValue = encoding.UnmarshalUint64(src[8:])
|
||||
src = src[16:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint64 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeFloat64:
|
||||
if len(src) < 16 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeFloat64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
|
||||
}
|
||||
// min and max values must be converted to real values with math.Float64frombits() during querying.
|
||||
ch.minValue = encoding.UnmarshalUint64(src)
|
||||
ch.maxValue = encoding.UnmarshalUint64(src[8:])
|
||||
src = src[16:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeFloat64 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeIPv4:
|
||||
if len(src) < 8 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeIPv4 from %d bytes for column %q; need at least 8 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = uint64(encoding.UnmarshalUint32(src))
|
||||
ch.maxValue = uint64(encoding.UnmarshalUint32(src[4:]))
|
||||
src = src[8:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeIPv4 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeTimestampISO8601:
|
||||
if len(src) < 16 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeTimestampISO8601 from %d bytes for column %q; need at least 16 bytes",
|
||||
len(src), ch.name)
|
||||
}
|
||||
ch.minValue = encoding.UnmarshalUint64(src)
|
||||
ch.maxValue = encoding.UnmarshalUint64(src[8:])
|
||||
src = src[16:]
|
||||
|
||||
tail, err = ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeTimestampISO8601 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
default:
|
||||
return srcOrig, fmt.Errorf("unexpected valueType=%d for column %q", ch.valueType, ch.name)
|
||||
}
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
func (ch *columnHeader) unmarshalValuesAndBloomFilters(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
|
||||
tail, err := ch.unmarshalValues(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
tail, err = ch.unmarshalBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal bloom filters: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
func (ch *columnHeader) unmarshalValues(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
|
||||
tail, n, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal valuesOffset: %w", err)
|
||||
}
|
||||
ch.valuesOffset = n
|
||||
src = tail
|
||||
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal valuesSize: %w", err)
|
||||
}
|
||||
if n > maxValuesBlockSize {
|
||||
return srcOrig, fmt.Errorf("too big valuesSize: %d bytes; mustn't exceed %d bytes", n, maxValuesBlockSize)
|
||||
}
|
||||
ch.valuesSize = n
|
||||
src = tail
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
func (ch *columnHeader) unmarshalBloomFilters(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
|
||||
tail, n, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterOffset: %w", err)
|
||||
}
|
||||
ch.bloomFilterOffset = n
|
||||
src = tail
|
||||
|
||||
tail, n, err = encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterSize: %w", err)
|
||||
}
|
||||
if n > maxBloomFilterBlockSize {
|
||||
return srcOrig, fmt.Errorf("too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", n, maxBloomFilterBlockSize)
|
||||
}
|
||||
ch.bloomFilterSize = n
|
||||
src = tail
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
// timestampsHeader contains the information about timestamps block.
|
||||
type timestampsHeader struct {
|
||||
// blockOffset is an offset of timestamps block inside timestampsFilename file
|
||||
blockOffset uint64
|
||||
|
||||
// blockSize is the size of the timestamps block inside timestampsFilename file
|
||||
blockSize uint64
|
||||
|
||||
// minTimestamp is the mimumum timestamp seen in the block
|
||||
minTimestamp int64
|
||||
|
||||
// maxTimestamp is the maximum timestamp seen in the block
|
||||
maxTimestamp int64
|
||||
|
||||
// marshalType is the type used for encoding the timestamps block
|
||||
marshalType encoding.MarshalType
|
||||
}
|
||||
|
||||
// reset resets th, so it can be reused
|
||||
func (th *timestampsHeader) reset() {
|
||||
th.blockOffset = 0
|
||||
th.blockSize = 0
|
||||
th.minTimestamp = 0
|
||||
th.maxTimestamp = 0
|
||||
th.marshalType = 0
|
||||
}
|
||||
|
||||
func (th *timestampsHeader) copyFrom(src *timestampsHeader) {
|
||||
th.blockOffset = src.blockOffset
|
||||
th.blockSize = src.blockSize
|
||||
th.minTimestamp = src.minTimestamp
|
||||
th.maxTimestamp = src.maxTimestamp
|
||||
th.marshalType = src.marshalType
|
||||
}
|
||||
|
||||
// marshal appends marshaled th to dst and returns the result.
|
||||
func (th *timestampsHeader) marshal(dst []byte) []byte {
|
||||
dst = encoding.MarshalUint64(dst, th.blockOffset)
|
||||
dst = encoding.MarshalUint64(dst, th.blockSize)
|
||||
dst = encoding.MarshalUint64(dst, uint64(th.minTimestamp))
|
||||
dst = encoding.MarshalUint64(dst, uint64(th.maxTimestamp))
|
||||
dst = append(dst, byte(th.marshalType))
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals th from src and returns the tail left after the unmarshaling.
|
||||
func (th *timestampsHeader) unmarshal(src []byte) ([]byte, error) {
|
||||
th.reset()
|
||||
|
||||
if len(src) < 33 {
|
||||
return src, fmt.Errorf("cannot unmarshal timestampsHeader from %d bytes; need at least 33 bytes", len(src))
|
||||
}
|
||||
|
||||
th.blockOffset = encoding.UnmarshalUint64(src)
|
||||
th.blockSize = encoding.UnmarshalUint64(src[8:])
|
||||
th.minTimestamp = int64(encoding.UnmarshalUint64(src[16:]))
|
||||
th.maxTimestamp = int64(encoding.UnmarshalUint64(src[24:]))
|
||||
th.marshalType = encoding.MarshalType(src[32])
|
||||
|
||||
return src[33:], nil
|
||||
}
|
454
lib/logstorage/block_header_test.go
Normal file
454
lib/logstorage/block_header_test.go
Normal file
|
@ -0,0 +1,454 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
func TestBlockHeaderMarshalUnmarshal(t *testing.T) {
|
||||
f := func(bh *blockHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := bh.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected lengths of the marshaled blockHeader; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
bh2 := &blockHeader{}
|
||||
tail, err := bh2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in unmarshal: %s", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
t.Fatalf("unexpected non-empty tail after unmarshal: %X", tail)
|
||||
}
|
||||
if !reflect.DeepEqual(bh, bh2) {
|
||||
t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", bh2, bh)
|
||||
}
|
||||
}
|
||||
f(&blockHeader{}, 61)
|
||||
f(&blockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
lo: 3443,
|
||||
hi: 23434,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 4344,
|
||||
rowsCount: 1234,
|
||||
timestampsHeader: timestampsHeader{
|
||||
blockOffset: 13234,
|
||||
blockSize: 8843,
|
||||
minTimestamp: -4334,
|
||||
maxTimestamp: 23434,
|
||||
marshalType: encoding.MarshalTypeNearestDelta2,
|
||||
},
|
||||
columnsHeaderOffset: 4384,
|
||||
columnsHeaderSize: 894,
|
||||
}, 65)
|
||||
}
|
||||
|
||||
func TestColumnsHeaderMarshalUnmarshal(t *testing.T) {
|
||||
f := func(csh *columnsHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := csh.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected lengths of the marshaled columnsHeader; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
csh2 := &columnsHeader{}
|
||||
err := csh2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in unmarshal: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(csh, csh2) {
|
||||
t.Fatalf("unexpected blockHeader unmarshaled\ngot\n%v\nwant\n%v", csh2, csh)
|
||||
}
|
||||
}
|
||||
f(&columnsHeader{}, 2)
|
||||
f(&columnsHeader{
|
||||
columnHeaders: []columnHeader{
|
||||
{
|
||||
name: "foobar",
|
||||
valueType: valueTypeString,
|
||||
valuesOffset: 12345,
|
||||
valuesSize: 23434,
|
||||
bloomFilterOffset: 89843,
|
||||
bloomFilterSize: 8934,
|
||||
},
|
||||
{
|
||||
name: "message",
|
||||
valueType: valueTypeUint16,
|
||||
minValue: 123,
|
||||
maxValue: 456,
|
||||
valuesOffset: 3412345,
|
||||
valuesSize: 234434,
|
||||
bloomFilterOffset: 83,
|
||||
bloomFilterSize: 34,
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}, 50)
|
||||
}
|
||||
|
||||
func TestBlockHeaderUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
bh := getBlockHeader()
|
||||
defer putBlockHeader(bh)
|
||||
tail, err := bh.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail;\ngot\n%q\nwant\n%q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
|
||||
bh := blockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
lo: 3443,
|
||||
hi: 23434,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 4344,
|
||||
rowsCount: 1234,
|
||||
timestampsHeader: timestampsHeader{
|
||||
blockOffset: 13234,
|
||||
blockSize: 8843,
|
||||
minTimestamp: -4334,
|
||||
maxTimestamp: 23434,
|
||||
marshalType: encoding.MarshalTypeNearestDelta2,
|
||||
},
|
||||
columnsHeaderOffset: 4384,
|
||||
columnsHeaderSize: 894,
|
||||
}
|
||||
data := bh.marshal(nil)
|
||||
for len(data) > 0 {
|
||||
data = data[:len(data)-1]
|
||||
f(data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestColumnsHeaderUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
csh := getColumnsHeader()
|
||||
defer putColumnsHeader(csh)
|
||||
err := csh.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
|
||||
csh := columnsHeader{
|
||||
columnHeaders: []columnHeader{
|
||||
{
|
||||
name: "foobar",
|
||||
valueType: valueTypeString,
|
||||
valuesOffset: 12345,
|
||||
valuesSize: 23434,
|
||||
bloomFilterOffset: 89843,
|
||||
bloomFilterSize: 8934,
|
||||
},
|
||||
{
|
||||
name: "message",
|
||||
valueType: valueTypeUint16,
|
||||
minValue: 123,
|
||||
maxValue: 456,
|
||||
valuesOffset: 3412345,
|
||||
valuesSize: 234434,
|
||||
bloomFilterOffset: 83,
|
||||
bloomFilterSize: 34,
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
data := csh.marshal(nil)
|
||||
for len(data) > 0 {
|
||||
data = data[:len(data)-1]
|
||||
f(data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockHeaderReset(t *testing.T) {
|
||||
bh := &blockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
lo: 3443,
|
||||
hi: 23434,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 8984,
|
||||
rowsCount: 1234,
|
||||
timestampsHeader: timestampsHeader{
|
||||
blockOffset: 13234,
|
||||
blockSize: 8843,
|
||||
minTimestamp: -4334,
|
||||
maxTimestamp: 23434,
|
||||
marshalType: encoding.MarshalTypeNearestDelta2,
|
||||
},
|
||||
columnsHeaderOffset: 12332,
|
||||
columnsHeaderSize: 234,
|
||||
}
|
||||
bh.reset()
|
||||
bhZero := &blockHeader{}
|
||||
if !reflect.DeepEqual(bh, bhZero) {
|
||||
t.Fatalf("unexpected non-zero blockHeader after reset: %v", bh)
|
||||
}
|
||||
}
|
||||
|
||||
func TestColumnsHeaderReset(t *testing.T) {
|
||||
csh := &columnsHeader{
|
||||
columnHeaders: []columnHeader{
|
||||
{
|
||||
name: "foobar",
|
||||
valueType: valueTypeString,
|
||||
valuesOffset: 12345,
|
||||
valuesSize: 23434,
|
||||
bloomFilterOffset: 89843,
|
||||
bloomFilterSize: 8934,
|
||||
},
|
||||
{
|
||||
name: "message",
|
||||
valueType: valueTypeUint16,
|
||||
minValue: 123,
|
||||
maxValue: 456,
|
||||
valuesOffset: 3412345,
|
||||
valuesSize: 234434,
|
||||
bloomFilterOffset: 83,
|
||||
bloomFilterSize: 34,
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
csh.reset()
|
||||
cshZero := &columnsHeader{
|
||||
columnHeaders: []columnHeader{},
|
||||
constColumns: []Field{},
|
||||
}
|
||||
if !reflect.DeepEqual(csh, cshZero) {
|
||||
t.Fatalf("unexpected non-zero columnsHeader after reset: %v", csh)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalUnmarshalBlockHeaders(t *testing.T) {
|
||||
f := func(bhs []blockHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
var data []byte
|
||||
for i := range bhs {
|
||||
data = bhs[i].marshal(data)
|
||||
}
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected length for marshaled blockHeader entries; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
bhs2, err := unmarshalBlockHeaders(nil, data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when unmarshaling blockHeader entries: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(bhs, bhs2) {
|
||||
t.Fatalf("unexpected blockHeader entries unmarshaled\ngot\n%v\nwant\n%v", bhs2, bhs)
|
||||
}
|
||||
}
|
||||
f(nil, 0)
|
||||
f([]blockHeader{{}}, 61)
|
||||
f([]blockHeader{
|
||||
{},
|
||||
{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
lo: 3443,
|
||||
hi: 23434,
|
||||
},
|
||||
},
|
||||
uncompressedSizeBytes: 89894,
|
||||
rowsCount: 1234,
|
||||
timestampsHeader: timestampsHeader{
|
||||
blockOffset: 13234,
|
||||
blockSize: 8843,
|
||||
minTimestamp: -4334,
|
||||
maxTimestamp: 23434,
|
||||
marshalType: encoding.MarshalTypeNearestDelta2,
|
||||
},
|
||||
columnsHeaderOffset: 12332,
|
||||
columnsHeaderSize: 234,
|
||||
},
|
||||
}, 127)
|
||||
}
|
||||
|
||||
func TestColumnHeaderMarshalUnmarshal(t *testing.T) {
|
||||
f := func(ch *columnHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := ch.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected marshaled length of columnHeader; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
var ch2 columnHeader
|
||||
tail, err := ch2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in umarshal(%v): %s", ch, err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
t.Fatalf("unexpected non-empty tail after unmarshal(%v): %X", ch, tail)
|
||||
}
|
||||
if !reflect.DeepEqual(ch, &ch2) {
|
||||
t.Fatalf("unexpected columnHeader after unmarshal;\ngot\n%v\nwant\n%v", &ch2, ch)
|
||||
}
|
||||
}
|
||||
f(&columnHeader{
|
||||
name: "foo",
|
||||
valueType: valueTypeUint8,
|
||||
}, 11)
|
||||
ch := &columnHeader{
|
||||
name: "foobar",
|
||||
valueType: valueTypeDict,
|
||||
|
||||
valuesOffset: 12345,
|
||||
valuesSize: 254452,
|
||||
}
|
||||
ch.valuesDict.getOrAdd("abc")
|
||||
f(ch, 18)
|
||||
}
|
||||
|
||||
func TestColumnHeaderUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var ch columnHeader
|
||||
tail, err := ch.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
|
||||
ch := &columnHeader{
|
||||
name: "abc",
|
||||
valueType: valueTypeUint16,
|
||||
bloomFilterSize: 3244,
|
||||
}
|
||||
data := ch.marshal(nil)
|
||||
f(data[:len(data)-1])
|
||||
}
|
||||
|
||||
func TestColumnHeaderReset(t *testing.T) {
|
||||
ch := &columnHeader{
|
||||
name: "foobar",
|
||||
valueType: valueTypeUint16,
|
||||
|
||||
valuesOffset: 12345,
|
||||
valuesSize: 254452,
|
||||
|
||||
bloomFilterOffset: 34898234,
|
||||
bloomFilterSize: 873434,
|
||||
}
|
||||
ch.valuesDict.getOrAdd("abc")
|
||||
ch.reset()
|
||||
chZero := &columnHeader{}
|
||||
chZero.valuesDict.values = []string{}
|
||||
if !reflect.DeepEqual(ch, chZero) {
|
||||
t.Fatalf("unexpected non-zero columnHeader after reset: %v", ch)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTimestampsHeaderMarshalUnmarshal(t *testing.T) {
|
||||
f := func(th *timestampsHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := th.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected length of marshaled timestampsHeader; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
var th2 timestampsHeader
|
||||
tail, err := th2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in unmarshal(%v): %s", th, err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
t.Fatalf("unexpected non-nil tail after unmarshal(%v): %X", th, tail)
|
||||
}
|
||||
if !reflect.DeepEqual(th, &th2) {
|
||||
t.Fatalf("unexpected timestampsHeader after unmarshal; got\n%v\nwant\n%v", &th2, th)
|
||||
}
|
||||
}
|
||||
f(×tampsHeader{}, 33)
|
||||
|
||||
f(×tampsHeader{
|
||||
blockOffset: 12345,
|
||||
blockSize: 3424834,
|
||||
minTimestamp: -123443,
|
||||
maxTimestamp: 234343,
|
||||
marshalType: encoding.MarshalTypeZSTDNearestDelta,
|
||||
}, 33)
|
||||
}
|
||||
|
||||
func TestTimestampsHeaderUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var th timestampsHeader
|
||||
tail, err := th.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail left; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
}
|
||||
|
||||
func TestTimestampsHeaderReset(t *testing.T) {
|
||||
th := ×tampsHeader{
|
||||
blockOffset: 12345,
|
||||
blockSize: 3424834,
|
||||
minTimestamp: -123443,
|
||||
maxTimestamp: 234343,
|
||||
marshalType: encoding.MarshalTypeZSTDNearestDelta,
|
||||
}
|
||||
th.reset()
|
||||
thZero := ×tampsHeader{}
|
||||
if !reflect.DeepEqual(th, thZero) {
|
||||
t.Fatalf("unexpected non-zero timestampsHeader after reset: %v", th)
|
||||
}
|
||||
}
|
645
lib/logstorage/block_search.go
Normal file
645
lib/logstorage/block_search.go
Normal file
|
@ -0,0 +1,645 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type blockSearchWork struct {
|
||||
// p is the part where the block belongs to.
|
||||
p *part
|
||||
|
||||
// so contains search options for the block search
|
||||
so *searchOptions
|
||||
|
||||
// bh is the header of the block to search.
|
||||
bh blockHeader
|
||||
}
|
||||
|
||||
func newBlockSearchWork(p *part, so *searchOptions, bh *blockHeader) *blockSearchWork {
|
||||
var bsw blockSearchWork
|
||||
bsw.p = p
|
||||
bsw.so = so
|
||||
bsw.bh.copyFrom(bh)
|
||||
return &bsw
|
||||
}
|
||||
|
||||
func getBlockSearch() *blockSearch {
|
||||
v := blockSearchPool.Get()
|
||||
if v == nil {
|
||||
return &blockSearch{}
|
||||
}
|
||||
return v.(*blockSearch)
|
||||
}
|
||||
|
||||
func putBlockSearch(bs *blockSearch) {
|
||||
bs.reset()
|
||||
blockSearchPool.Put(bs)
|
||||
}
|
||||
|
||||
var blockSearchPool sync.Pool
|
||||
|
||||
type blockSearch struct {
|
||||
// bsw is the actual work to perform on the given block pointed by bsw.ph
|
||||
bsw *blockSearchWork
|
||||
|
||||
// br contains result for the search in the block after search() call
|
||||
br blockResult
|
||||
|
||||
// timestampsCache contains cached timestamps for the given block.
|
||||
timestampsCache *encoding.Int64s
|
||||
|
||||
// bloomFilterCache contains cached bloom filters for requested columns in the given block
|
||||
bloomFilterCache map[string]*bloomFilter
|
||||
|
||||
// valuesCache contains cached values for requested columns in the given block
|
||||
valuesCache map[string]*stringBucket
|
||||
|
||||
// sbu is used for unmarshaling local columns
|
||||
sbu stringsBlockUnmarshaler
|
||||
|
||||
// csh is the columnsHeader associated with the given block
|
||||
csh columnsHeader
|
||||
}
|
||||
|
||||
func (bs *blockSearch) reset() {
|
||||
bs.bsw = nil
|
||||
bs.br.reset()
|
||||
|
||||
if bs.timestampsCache != nil {
|
||||
encoding.PutInt64s(bs.timestampsCache)
|
||||
bs.timestampsCache = nil
|
||||
}
|
||||
|
||||
bloomFilterCache := bs.bloomFilterCache
|
||||
for k, bf := range bloomFilterCache {
|
||||
putBloomFilter(bf)
|
||||
delete(bloomFilterCache, k)
|
||||
}
|
||||
|
||||
valuesCache := bs.valuesCache
|
||||
for k, values := range valuesCache {
|
||||
putStringBucket(values)
|
||||
delete(valuesCache, k)
|
||||
}
|
||||
|
||||
bs.sbu.reset()
|
||||
bs.csh.reset()
|
||||
}
|
||||
|
||||
func (bs *blockSearch) partPath() string {
|
||||
return bs.bsw.p.path
|
||||
}
|
||||
|
||||
func (bs *blockSearch) search(bsw *blockSearchWork) {
|
||||
bs.reset()
|
||||
|
||||
bs.bsw = bsw
|
||||
|
||||
bs.csh.initFromBlockHeader(bsw.p, &bsw.bh)
|
||||
|
||||
// search rows matching the given filter
|
||||
bm := getFilterBitmap(int(bsw.bh.rowsCount))
|
||||
bm.setBits()
|
||||
bs.bsw.so.filter.apply(bs, bm)
|
||||
|
||||
bs.br.mustInit(bs, bm)
|
||||
if bm.isZero() {
|
||||
putFilterBitmap(bm)
|
||||
return
|
||||
}
|
||||
|
||||
// fetch the requested columns to bs.br.
|
||||
for _, columnName := range bs.bsw.so.resultColumnNames {
|
||||
switch columnName {
|
||||
case "_stream":
|
||||
bs.br.addStreamColumn(bs)
|
||||
case "_time":
|
||||
bs.br.addTimeColumn(bs)
|
||||
default:
|
||||
v := bs.csh.getConstColumnValue(columnName)
|
||||
if v != "" {
|
||||
bs.br.addConstColumn(v)
|
||||
continue
|
||||
}
|
||||
ch := bs.csh.getColumnHeader(columnName)
|
||||
if ch == nil {
|
||||
bs.br.addConstColumn("")
|
||||
} else {
|
||||
bs.br.addColumn(bs, ch, bm)
|
||||
}
|
||||
}
|
||||
}
|
||||
putFilterBitmap(bm)
|
||||
}
|
||||
|
||||
func (csh *columnsHeader) initFromBlockHeader(p *part, bh *blockHeader) {
|
||||
bb := longTermBufPool.Get()
|
||||
columnsHeaderSize := bh.columnsHeaderSize
|
||||
if columnsHeaderSize > maxColumnsHeaderSize {
|
||||
logger.Panicf("FATAL: %s: columns header size cannot exceed %d bytes; got %d bytes", p.path, maxColumnsHeaderSize, columnsHeaderSize)
|
||||
}
|
||||
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize))
|
||||
p.columnsHeaderFile.MustReadAt(bb.B, int64(bh.columnsHeaderOffset))
|
||||
|
||||
if err := csh.unmarshal(bb.B); err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", p.path, err)
|
||||
}
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
|
||||
// getBloomFilterForColumn returns bloom filter for the given ch.
|
||||
//
|
||||
// The returned bloom filter belongs to bs, so it becomes invalid after bs reset.
|
||||
func (bs *blockSearch) getBloomFilterForColumn(ch *columnHeader) *bloomFilter {
|
||||
bf := bs.bloomFilterCache[ch.name]
|
||||
if bf != nil {
|
||||
return bf
|
||||
}
|
||||
|
||||
p := bs.bsw.p
|
||||
|
||||
bloomFilterFile := p.fieldBloomFilterFile
|
||||
if ch.name == "" {
|
||||
bloomFilterFile = p.messageBloomFilterFile
|
||||
}
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bloomFilterSize := ch.bloomFilterSize
|
||||
if bloomFilterSize > maxBloomFilterBlockSize {
|
||||
logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxBloomFilterBlockSize, bloomFilterSize)
|
||||
}
|
||||
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(bloomFilterSize))
|
||||
bloomFilterFile.MustReadAt(bb.B, int64(ch.bloomFilterOffset))
|
||||
bf = getBloomFilter()
|
||||
if err := bf.unmarshal(bb.B); err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal bloom filter: %s", bs.partPath(), err)
|
||||
}
|
||||
longTermBufPool.Put(bb)
|
||||
|
||||
if bs.bloomFilterCache == nil {
|
||||
bs.bloomFilterCache = make(map[string]*bloomFilter)
|
||||
}
|
||||
bs.bloomFilterCache[ch.name] = bf
|
||||
return bf
|
||||
}
|
||||
|
||||
// getValuesForColumn returns block values for the given ch.
|
||||
//
|
||||
// The returned values belong to bs, so they become invalid after bs reset.
|
||||
func (bs *blockSearch) getValuesForColumn(ch *columnHeader) []string {
|
||||
values := bs.valuesCache[ch.name]
|
||||
if values != nil {
|
||||
return values.a
|
||||
}
|
||||
|
||||
p := bs.bsw.p
|
||||
|
||||
valuesFile := p.fieldValuesFile
|
||||
if ch.name == "" {
|
||||
valuesFile = p.messageValuesFile
|
||||
}
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
valuesSize := ch.valuesSize
|
||||
if valuesSize > maxValuesBlockSize {
|
||||
logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxValuesBlockSize, valuesSize)
|
||||
}
|
||||
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(valuesSize))
|
||||
valuesFile.MustReadAt(bb.B, int64(ch.valuesOffset))
|
||||
|
||||
values = getStringBucket()
|
||||
var err error
|
||||
values.a, err = bs.sbu.unmarshal(values.a[:0], bb.B, bs.bsw.bh.rowsCount)
|
||||
longTermBufPool.Put(bb)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal column %q: %s", bs.partPath(), ch.name, err)
|
||||
}
|
||||
|
||||
if bs.valuesCache == nil {
|
||||
bs.valuesCache = make(map[string]*stringBucket)
|
||||
}
|
||||
bs.valuesCache[ch.name] = values
|
||||
return values.a
|
||||
}
|
||||
|
||||
// getTimestamps returns timestamps for the given bs.
|
||||
//
|
||||
// The returned timestamps belong to bs, so they become invalid after bs reset.
|
||||
func (bs *blockSearch) getTimestamps() []int64 {
|
||||
timestamps := bs.timestampsCache
|
||||
if timestamps != nil {
|
||||
return timestamps.A
|
||||
}
|
||||
|
||||
p := bs.bsw.p
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
th := &bs.bsw.bh.timestampsHeader
|
||||
blockSize := th.blockSize
|
||||
if blockSize > maxTimestampsBlockSize {
|
||||
logger.Panicf("FATAL: %s: timestamps block size cannot exceed %d bytes; got %d bytes", bs.partPath(), maxTimestampsBlockSize, blockSize)
|
||||
}
|
||||
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(blockSize))
|
||||
p.timestampsFile.MustReadAt(bb.B, int64(th.blockOffset))
|
||||
|
||||
rowsCount := int(bs.bsw.bh.rowsCount)
|
||||
timestamps = encoding.GetInt64s(rowsCount)
|
||||
var err error
|
||||
timestamps.A, err = encoding.UnmarshalTimestamps(timestamps.A[:0], bb.B, th.marshalType, th.minTimestamp, rowsCount)
|
||||
longTermBufPool.Put(bb)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal timestamps: %s", bs.partPath(), err)
|
||||
}
|
||||
bs.timestampsCache = timestamps
|
||||
return timestamps.A
|
||||
}
|
||||
|
||||
// mustReadBlockHeaders reads ih block headers from p, appends them to dst and returns the result.
|
||||
func (ih *indexBlockHeader) mustReadBlockHeaders(dst []blockHeader, p *part) []blockHeader {
|
||||
bbCompressed := longTermBufPool.Get()
|
||||
indexBlockSize := ih.indexBlockSize
|
||||
if indexBlockSize > maxIndexBlockSize {
|
||||
logger.Panicf("FATAL: %s: index block size cannot exceed %d bytes; got %d bytes", p.indexFile.Path(), maxIndexBlockSize, indexBlockSize)
|
||||
}
|
||||
bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize))
|
||||
p.indexFile.MustReadAt(bbCompressed.B, int64(ih.indexBlockOffset))
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
var err error
|
||||
bb.B, err = encoding.DecompressZSTD(bb.B, bbCompressed.B)
|
||||
longTermBufPool.Put(bbCompressed)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err)
|
||||
}
|
||||
|
||||
dst, err = unmarshalBlockHeaders(dst, bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal block headers read at offset %d with size %d: %s", p.indexFile.Path(), ih.indexBlockOffset, ih.indexBlockSize, err)
|
||||
}
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
type blockResult struct {
|
||||
buf []byte
|
||||
valuesBuf []string
|
||||
|
||||
// streamID is streamID for the given blockResult
|
||||
streamID streamID
|
||||
|
||||
// cs contain values for result columns
|
||||
cs []blockResultColumn
|
||||
|
||||
// timestamps contain timestamps for the selected log entries
|
||||
timestamps []int64
|
||||
}
|
||||
|
||||
func (br *blockResult) reset() {
|
||||
br.buf = br.buf[:0]
|
||||
|
||||
vb := br.valuesBuf
|
||||
for i := range vb {
|
||||
vb[i] = ""
|
||||
}
|
||||
br.valuesBuf = vb[:0]
|
||||
|
||||
br.streamID.reset()
|
||||
|
||||
cs := br.cs
|
||||
for i := range cs {
|
||||
cs[i].reset()
|
||||
}
|
||||
br.cs = cs[:0]
|
||||
|
||||
br.timestamps = br.timestamps[:0]
|
||||
}
|
||||
|
||||
func (br *blockResult) RowsCount() int {
|
||||
return len(br.timestamps)
|
||||
}
|
||||
|
||||
func (br *blockResult) mustInit(bs *blockSearch, bm *filterBitmap) {
|
||||
br.reset()
|
||||
|
||||
br.streamID = bs.bsw.bh.streamID
|
||||
|
||||
if !bm.isZero() {
|
||||
// Initialize timestamps, since they are used for determining the number of rows in br.RowsCount()
|
||||
srcTimestamps := bs.getTimestamps()
|
||||
dstTimestamps := br.timestamps[:0]
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
ts := srcTimestamps[idx]
|
||||
dstTimestamps = append(dstTimestamps, ts)
|
||||
return true
|
||||
})
|
||||
br.timestamps = dstTimestamps
|
||||
}
|
||||
}
|
||||
|
||||
func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *filterBitmap) {
|
||||
buf := br.buf
|
||||
valuesBuf := br.valuesBuf
|
||||
valuesBufLen := len(valuesBuf)
|
||||
var dictValues []string
|
||||
|
||||
appendValue := func(v string) {
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, v...)
|
||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
}
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeDict:
|
||||
dictValues = ch.valuesDict.values
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 1 {
|
||||
logger.Panicf("FATAL: %s: unexpected dict value size for column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
dictIdx := v[0]
|
||||
if int(dictIdx) >= len(dictValues) {
|
||||
logger.Panicf("FATAL: %s: too big dict index for column %q: %d; should be smaller than %d", bs.partPath(), ch.name, dictIdx, len(dictValues))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeUint8:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 1 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for uint8 column %q; got %d bytes; want 1 byte", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeUint16:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 2 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for uint16 column %q; got %d bytes; want 2 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeUint32:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for uint32 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeUint64:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for uint64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for float64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeIPv4:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for ipv4 column %q; got %d bytes; want 4 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
case valueTypeTimestampISO8601:
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for timestmap column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
appendValue(v)
|
||||
return true
|
||||
})
|
||||
default:
|
||||
logger.Panicf("FATAL: %s: unknown valueType=%d for column %q", bs.partPath(), ch.valueType, ch.name)
|
||||
}
|
||||
|
||||
encodedValues := valuesBuf[valuesBufLen:]
|
||||
|
||||
valuesBufLen = len(valuesBuf)
|
||||
for _, v := range dictValues {
|
||||
appendValue(v)
|
||||
}
|
||||
dictValues = valuesBuf[valuesBufLen:]
|
||||
|
||||
br.cs = append(br.cs, blockResultColumn{
|
||||
valueType: ch.valueType,
|
||||
dictValues: dictValues,
|
||||
encodedValues: encodedValues,
|
||||
})
|
||||
br.buf = buf
|
||||
br.valuesBuf = valuesBuf
|
||||
}
|
||||
|
||||
func (br *blockResult) addTimeColumn(bs *blockSearch) {
|
||||
br.cs = append(br.cs, blockResultColumn{
|
||||
isTime: true,
|
||||
})
|
||||
}
|
||||
|
||||
func (br *blockResult) addStreamColumn(bs *blockSearch) {
|
||||
bb := bbPool.Get()
|
||||
bb.B = bs.bsw.p.pt.appendStreamTagsByStreamID(bb.B[:0], &br.streamID)
|
||||
if len(bb.B) > 0 {
|
||||
st := GetStreamTags()
|
||||
mustUnmarshalStreamTags(st, bb.B)
|
||||
bb.B = st.marshalString(bb.B[:0])
|
||||
PutStreamTags(st)
|
||||
}
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
br.addConstColumn(s)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func (br *blockResult) addConstColumn(value string) {
|
||||
buf := br.buf
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, value...)
|
||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
br.buf = buf
|
||||
|
||||
valuesBuf := br.valuesBuf
|
||||
valuesBufLen := len(valuesBuf)
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
br.valuesBuf = valuesBuf
|
||||
|
||||
br.cs = append(br.cs, blockResultColumn{
|
||||
isConst: true,
|
||||
valueType: valueTypeUnknown,
|
||||
encodedValues: valuesBuf[valuesBufLen:],
|
||||
})
|
||||
}
|
||||
|
||||
// getColumnValues returns values for the column with the given idx.
|
||||
//
|
||||
// The returned values are valid until br.reset() is called.
|
||||
func (br *blockResult) getColumnValues(idx int) []string {
|
||||
c := &br.cs[idx]
|
||||
if c.values != nil {
|
||||
return c.values
|
||||
}
|
||||
|
||||
buf := br.buf
|
||||
valuesBuf := br.valuesBuf
|
||||
valuesBufLen := len(valuesBuf)
|
||||
|
||||
if c.isConst {
|
||||
v := c.encodedValues[0]
|
||||
for range br.timestamps {
|
||||
valuesBuf = append(valuesBuf, v)
|
||||
}
|
||||
c.values = valuesBuf[valuesBufLen:]
|
||||
br.valuesBuf = valuesBuf
|
||||
return c.values
|
||||
}
|
||||
if c.isTime {
|
||||
for _, timestamp := range br.timestamps {
|
||||
t := time.Unix(0, timestamp).UTC()
|
||||
bufLen := len(buf)
|
||||
buf = t.AppendFormat(buf, time.RFC3339Nano)
|
||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
}
|
||||
c.values = valuesBuf[valuesBufLen:]
|
||||
br.buf = buf
|
||||
br.valuesBuf = valuesBuf
|
||||
return c.values
|
||||
}
|
||||
|
||||
appendValue := func(v string) {
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, v...)
|
||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
c.values = c.encodedValues
|
||||
return c.values
|
||||
case valueTypeDict:
|
||||
dictValues := c.dictValues
|
||||
for _, v := range c.encodedValues {
|
||||
dictIdx := v[0]
|
||||
appendValue(dictValues[dictIdx])
|
||||
}
|
||||
case valueTypeUint8:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
n := uint64(v[0])
|
||||
bb.B = strconv.AppendUint(bb.B[:0], n, 10)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint16:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint16(b))
|
||||
bb.B = strconv.AppendUint(bb.B[:0], n, 10)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint32:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint32(b))
|
||||
bb.B = strconv.AppendUint(bb.B[:0], n, 10)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint64:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
bb.B = strconv.AppendUint(bb.B[:0], n, 10)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
bb.B = toFloat64String(bb.B[:0], v)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeIPv4:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
bb.B = toIPv4String(bb.B[:0], v)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeTimestampISO8601:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.encodedValues {
|
||||
bb.B = toTimestampISO8601String(bb.B[:0], v)
|
||||
appendValue(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
|
||||
c.values = valuesBuf[valuesBufLen:]
|
||||
br.buf = buf
|
||||
br.valuesBuf = valuesBuf
|
||||
|
||||
return c.values
|
||||
}
|
||||
|
||||
type blockResultColumn struct {
|
||||
// isConst is set to true if the column is const.
|
||||
//
|
||||
// The column value is stored in encodedValues[0]
|
||||
isConst bool
|
||||
|
||||
// isTime is set to true if the column contains _time values.
|
||||
//
|
||||
// The column values are stored in blockResult.timestamps
|
||||
isTime bool
|
||||
|
||||
// valueType is the type of non-cost value
|
||||
valueType valueType
|
||||
|
||||
// dictValues contain dictionary values for valueTypeDict column
|
||||
dictValues []string
|
||||
|
||||
// encodedValues contain encoded values for non-const column
|
||||
encodedValues []string
|
||||
|
||||
// values contain decoded values after getColumnValues() call for the given column
|
||||
values []string
|
||||
}
|
||||
|
||||
func (c *blockResultColumn) reset() {
|
||||
c.isConst = false
|
||||
c.isTime = false
|
||||
c.valueType = valueTypeUnknown
|
||||
c.dictValues = nil
|
||||
c.encodedValues = nil
|
||||
c.values = nil
|
||||
}
|
288
lib/logstorage/block_stream_merger.go
Normal file
288
lib/logstorage/block_stream_merger.go
Normal file
|
@ -0,0 +1,288 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// mustMergeBlockStreams merges bsrs to bsw and updates ph accordingly.
|
||||
//
|
||||
// Finalize() is guaranteed to be called on bsrs and bsw before returning from the func.
|
||||
func mustMergeBlockStreams(ph *partHeader, bsw *blockStreamWriter, bsrs []*blockStreamReader, stopCh <-chan struct{}) {
|
||||
bsm := getBlockStreamMerger()
|
||||
bsm.mustInit(bsw, bsrs)
|
||||
for len(bsm.readersHeap) > 0 {
|
||||
if needStop(stopCh) {
|
||||
break
|
||||
}
|
||||
bsr := bsm.readersHeap[0]
|
||||
bsm.mustWriteBlock(&bsr.blockData, bsw)
|
||||
if bsr.NextBlock() {
|
||||
heap.Fix(&bsm.readersHeap, 0)
|
||||
} else {
|
||||
heap.Pop(&bsm.readersHeap)
|
||||
}
|
||||
}
|
||||
bsm.mustFlushRows()
|
||||
putBlockStreamMerger(bsm)
|
||||
|
||||
bsw.Finalize(ph)
|
||||
mustCloseBlockStreamReaders(bsrs)
|
||||
}
|
||||
|
||||
// blockStreamMerger merges block streams
|
||||
type blockStreamMerger struct {
|
||||
// bsw is the block stream writer to write the merged blocks.
|
||||
bsw *blockStreamWriter
|
||||
|
||||
// bsrs contains the original readers passed to mustInit().
|
||||
// They are used by ReadersPaths()
|
||||
bsrs []*blockStreamReader
|
||||
|
||||
// readersHeap contains a heap of readers to read blocks to merge.
|
||||
readersHeap blockStreamReadersHeap
|
||||
|
||||
// streamID is the stream ID for the pending data.
|
||||
streamID streamID
|
||||
|
||||
// sbu is the unmarshaler for strings in rows and rowsTmp.
|
||||
sbu *stringsBlockUnmarshaler
|
||||
|
||||
// vd is the decoder for unmarshaled strings.
|
||||
vd *valuesDecoder
|
||||
|
||||
// bd is the pending blockData.
|
||||
// bd is unpacked into rows when needed.
|
||||
bd blockData
|
||||
|
||||
// rows is pending log entries.
|
||||
rows rows
|
||||
|
||||
// rowsTmp is temporary storage for log entries during merge.
|
||||
rowsTmp rows
|
||||
|
||||
// uncompressedRowsSizeBytes is the current size of uncompressed rows.
|
||||
//
|
||||
// It is used for flushing rows to blocks when their size reaches maxUncompressedBlockSize
|
||||
uncompressedRowsSizeBytes uint64
|
||||
}
|
||||
|
||||
func (bsm *blockStreamMerger) reset() {
|
||||
bsm.bsw = nil
|
||||
|
||||
rhs := bsm.readersHeap
|
||||
for i := range rhs {
|
||||
rhs[i] = nil
|
||||
}
|
||||
bsm.readersHeap = rhs[:0]
|
||||
|
||||
bsm.streamID.reset()
|
||||
bsm.resetRows()
|
||||
}
|
||||
|
||||
func (bsm *blockStreamMerger) resetRows() {
|
||||
if bsm.sbu != nil {
|
||||
putStringsBlockUnmarshaler(bsm.sbu)
|
||||
bsm.sbu = nil
|
||||
}
|
||||
if bsm.vd != nil {
|
||||
putValuesDecoder(bsm.vd)
|
||||
bsm.vd = nil
|
||||
}
|
||||
bsm.bd.reset()
|
||||
|
||||
bsm.rows.reset()
|
||||
bsm.rowsTmp.reset()
|
||||
|
||||
bsm.uncompressedRowsSizeBytes = 0
|
||||
}
|
||||
|
||||
func (bsm *blockStreamMerger) mustInit(bsw *blockStreamWriter, bsrs []*blockStreamReader) {
|
||||
bsm.reset()
|
||||
|
||||
bsm.bsw = bsw
|
||||
bsm.bsrs = bsrs
|
||||
|
||||
rsh := bsm.readersHeap[:0]
|
||||
for _, bsr := range bsrs {
|
||||
if bsr.NextBlock() {
|
||||
rsh = append(rsh, bsr)
|
||||
}
|
||||
}
|
||||
bsm.readersHeap = rsh
|
||||
heap.Init(&bsm.readersHeap)
|
||||
}
|
||||
|
||||
// mustWriteBlock writes bd to bsm
|
||||
func (bsm *blockStreamMerger) mustWriteBlock(bd *blockData, bsw *blockStreamWriter) {
|
||||
bsm.checkNextBlock(bd)
|
||||
switch {
|
||||
case !bd.streamID.equal(&bsm.streamID):
|
||||
// The bd contains another streamID.
|
||||
// Write the current log entries under the current streamID, then process the bd.
|
||||
bsm.mustFlushRows()
|
||||
bsm.streamID = bd.streamID
|
||||
if bd.uncompressedSizeBytes >= maxUncompressedBlockSize {
|
||||
// Fast path - write full bd to the output without extracting log entries from it.
|
||||
bsw.MustWriteBlockData(bd)
|
||||
} else {
|
||||
// Slow path - copy the bd to the curr bd.
|
||||
bsm.bd.copyFrom(bd)
|
||||
}
|
||||
case bd.uncompressedSizeBytes >= maxUncompressedBlockSize:
|
||||
// The bd contains the same streamID and it is full,
|
||||
// so it can be written next after the current log entries
|
||||
// without the need to merge the bd with the current log entries.
|
||||
// Write the current log entries and then the bd.
|
||||
bsm.mustFlushRows()
|
||||
bsw.MustWriteBlockData(bd)
|
||||
default:
|
||||
// The bd contains the same streamID and it isn't full,
|
||||
// so it must be merged with the current log entries.
|
||||
bsm.mustMergeRows(bd)
|
||||
}
|
||||
}
|
||||
|
||||
// checkNextBlock checks whether the bd can be written next after the current data.
|
||||
func (bsm *blockStreamMerger) checkNextBlock(bd *blockData) {
|
||||
if len(bsm.rows.timestamps) > 0 && bsm.bd.rowsCount > 0 {
|
||||
logger.Panicf("BUG: bsm.bd must be empty when bsm.rows isn't empty! got %d log entries in bsm.bd", bsm.bd.rowsCount)
|
||||
}
|
||||
if bd.streamID.less(&bsm.streamID) {
|
||||
logger.Panicf("FATAL: cannot merge %s: the streamID=%s for the next block is smaller than the streamID=%s for the current block",
|
||||
bsm.ReadersPaths(), &bd.streamID, &bsm.streamID)
|
||||
}
|
||||
if !bd.streamID.equal(&bsm.streamID) {
|
||||
return
|
||||
}
|
||||
// streamID at bd equals streamID at bsm. Check that minTimestamp in bd is bigger or equal to the minTimestmap at bsm.
|
||||
if bd.rowsCount == 0 {
|
||||
return
|
||||
}
|
||||
nextMinTimestamp := bd.timestampsData.minTimestamp
|
||||
if len(bsm.rows.timestamps) == 0 {
|
||||
if bsm.bd.rowsCount == 0 {
|
||||
return
|
||||
}
|
||||
minTimestamp := bsm.bd.timestampsData.minTimestamp
|
||||
if nextMinTimestamp < minTimestamp {
|
||||
logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for the current block",
|
||||
bsm.ReadersPaths(), nextMinTimestamp, minTimestamp)
|
||||
}
|
||||
return
|
||||
}
|
||||
minTimestamp := bsm.rows.timestamps[0]
|
||||
if nextMinTimestamp < minTimestamp {
|
||||
logger.Panicf("FATAL: cannot merge %s: the next block's minTimestamp=%d is smaller than the minTimestamp=%d for log entries for the current block",
|
||||
bsm.ReadersPaths(), nextMinTimestamp, minTimestamp)
|
||||
}
|
||||
}
|
||||
|
||||
// ReadersPaths returns paths for input blockStreamReaders
|
||||
func (bsm *blockStreamMerger) ReadersPaths() string {
|
||||
paths := make([]string, len(bsm.bsrs))
|
||||
for i, bsr := range bsm.bsrs {
|
||||
paths[i] = bsr.Path()
|
||||
}
|
||||
return fmt.Sprintf("[%s]", strings.Join(paths, ","))
|
||||
}
|
||||
|
||||
// mustMergeRows merges the current log entries inside bsm with bd log entries.
|
||||
func (bsm *blockStreamMerger) mustMergeRows(bd *blockData) {
|
||||
if bsm.bd.rowsCount > 0 {
|
||||
// Unmarshal log entries from bsm.bd
|
||||
bsm.mustUnmarshalRows(&bsm.bd)
|
||||
bsm.bd.reset()
|
||||
}
|
||||
|
||||
// Unmarshal log entries from bd
|
||||
rowsLen := len(bsm.rows.timestamps)
|
||||
bsm.mustUnmarshalRows(bd)
|
||||
|
||||
// Merge unmarshaled log entries
|
||||
timestamps := bsm.rows.timestamps
|
||||
rows := bsm.rows.rows
|
||||
bsm.rowsTmp.mergeRows(timestamps[:rowsLen], timestamps[rowsLen:], rows[:rowsLen], rows[rowsLen:])
|
||||
bsm.rows, bsm.rowsTmp = bsm.rowsTmp, bsm.rows
|
||||
bsm.rowsTmp.reset()
|
||||
|
||||
if bsm.uncompressedRowsSizeBytes >= maxUncompressedBlockSize {
|
||||
bsm.mustFlushRows()
|
||||
}
|
||||
}
|
||||
|
||||
func (bsm *blockStreamMerger) mustUnmarshalRows(bd *blockData) {
|
||||
rowsLen := len(bsm.rows.timestamps)
|
||||
if bsm.sbu == nil {
|
||||
bsm.sbu = getStringsBlockUnmarshaler()
|
||||
}
|
||||
if bsm.vd == nil {
|
||||
bsm.vd = getValuesDecoder()
|
||||
}
|
||||
if err := bd.unmarshalRows(&bsm.rows, bsm.sbu, bsm.vd); err != nil {
|
||||
logger.Panicf("FATAL: cannot merge %s: cannot unmarshal log entries from blockData: %s", bsm.ReadersPaths(), err)
|
||||
}
|
||||
bsm.uncompressedRowsSizeBytes += uncompressedRowsSizeBytes(bsm.rows.rows[rowsLen:])
|
||||
}
|
||||
|
||||
func (bsm *blockStreamMerger) mustFlushRows() {
|
||||
if len(bsm.rows.timestamps) == 0 {
|
||||
bsm.bsw.MustWriteBlockData(&bsm.bd)
|
||||
} else {
|
||||
bsm.bsw.MustWriteRows(&bsm.streamID, bsm.rows.timestamps, bsm.rows.rows)
|
||||
}
|
||||
bsm.resetRows()
|
||||
}
|
||||
|
||||
func getBlockStreamMerger() *blockStreamMerger {
|
||||
v := blockStreamMergerPool.Get()
|
||||
if v == nil {
|
||||
return &blockStreamMerger{}
|
||||
}
|
||||
return v.(*blockStreamMerger)
|
||||
}
|
||||
|
||||
func putBlockStreamMerger(bsm *blockStreamMerger) {
|
||||
bsm.reset()
|
||||
blockStreamMergerPool.Put(bsm)
|
||||
}
|
||||
|
||||
var blockStreamMergerPool sync.Pool
|
||||
|
||||
type blockStreamReadersHeap []*blockStreamReader
|
||||
|
||||
func (h *blockStreamReadersHeap) Len() int {
|
||||
return len(*h)
|
||||
}
|
||||
|
||||
func (h *blockStreamReadersHeap) Less(i, j int) bool {
|
||||
x := *h
|
||||
a := &x[i].blockData
|
||||
b := &x[j].blockData
|
||||
if !a.streamID.equal(&b.streamID) {
|
||||
return a.streamID.less(&b.streamID)
|
||||
}
|
||||
return a.timestampsData.minTimestamp < b.timestampsData.minTimestamp
|
||||
}
|
||||
|
||||
func (h *blockStreamReadersHeap) Swap(i, j int) {
|
||||
x := *h
|
||||
x[i], x[j] = x[j], x[i]
|
||||
}
|
||||
|
||||
func (h *blockStreamReadersHeap) Push(v interface{}) {
|
||||
bsr := v.(*blockStreamReader)
|
||||
*h = append(*h, bsr)
|
||||
}
|
||||
|
||||
func (h *blockStreamReadersHeap) Pop() interface{} {
|
||||
x := *h
|
||||
bsr := x[len(x)-1]
|
||||
x[len(x)-1] = nil
|
||||
*h = x[:len(x)-1]
|
||||
return bsr
|
||||
}
|
383
lib/logstorage/block_stream_reader.go
Normal file
383
lib/logstorage/block_stream_reader.go
Normal file
|
@ -0,0 +1,383 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type readerWithStats struct {
|
||||
r filestream.ReadCloser
|
||||
bytesRead uint64
|
||||
}
|
||||
|
||||
func (r *readerWithStats) reset() {
|
||||
r.r = nil
|
||||
r.bytesRead = 0
|
||||
}
|
||||
|
||||
func (r *readerWithStats) init(rc filestream.ReadCloser) {
|
||||
r.reset()
|
||||
|
||||
r.r = rc
|
||||
}
|
||||
|
||||
// Path returns the path to r file
|
||||
func (r *readerWithStats) Path() string {
|
||||
return r.r.Path()
|
||||
}
|
||||
|
||||
// MustReadFull reads len(data) to r.
|
||||
func (r *readerWithStats) MustReadFull(data []byte) {
|
||||
fs.MustReadData(r.r, data)
|
||||
r.bytesRead += uint64(len(data))
|
||||
}
|
||||
|
||||
func (r *readerWithStats) Read(p []byte) (int, error) {
|
||||
n, err := r.r.Read(p)
|
||||
r.bytesRead += uint64(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (r *readerWithStats) MustClose() {
|
||||
r.r.MustClose()
|
||||
r.r = nil
|
||||
}
|
||||
|
||||
// streamReaders contains readers for blockStreamReader
|
||||
type streamReaders struct {
|
||||
metaindexReader readerWithStats
|
||||
indexReader readerWithStats
|
||||
columnsHeaderReader readerWithStats
|
||||
timestampsReader readerWithStats
|
||||
fieldValuesReader readerWithStats
|
||||
fieldBloomFilterReader readerWithStats
|
||||
messageValuesReader readerWithStats
|
||||
messageBloomFilterReader readerWithStats
|
||||
}
|
||||
|
||||
func (sr *streamReaders) reset() {
|
||||
sr.metaindexReader.reset()
|
||||
sr.indexReader.reset()
|
||||
sr.columnsHeaderReader.reset()
|
||||
sr.timestampsReader.reset()
|
||||
sr.fieldValuesReader.reset()
|
||||
sr.fieldBloomFilterReader.reset()
|
||||
sr.messageValuesReader.reset()
|
||||
sr.messageBloomFilterReader.reset()
|
||||
}
|
||||
|
||||
func (sr *streamReaders) init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader, fieldValuesReader, fieldBloomFilterReader,
|
||||
messageValuesReader, messageBloomFilterReader filestream.ReadCloser,
|
||||
) {
|
||||
sr.metaindexReader.init(metaindexReader)
|
||||
sr.indexReader.init(indexReader)
|
||||
sr.columnsHeaderReader.init(columnsHeaderReader)
|
||||
sr.timestampsReader.init(timestampsReader)
|
||||
sr.fieldValuesReader.init(fieldValuesReader)
|
||||
sr.fieldBloomFilterReader.init(fieldBloomFilterReader)
|
||||
sr.messageValuesReader.init(messageValuesReader)
|
||||
sr.messageBloomFilterReader.init(messageBloomFilterReader)
|
||||
}
|
||||
|
||||
func (sr *streamReaders) totalBytesRead() uint64 {
|
||||
n := uint64(0)
|
||||
n += sr.metaindexReader.bytesRead
|
||||
n += sr.indexReader.bytesRead
|
||||
n += sr.columnsHeaderReader.bytesRead
|
||||
n += sr.timestampsReader.bytesRead
|
||||
n += sr.fieldValuesReader.bytesRead
|
||||
n += sr.fieldBloomFilterReader.bytesRead
|
||||
n += sr.messageValuesReader.bytesRead
|
||||
n += sr.messageBloomFilterReader.bytesRead
|
||||
return n
|
||||
}
|
||||
|
||||
func (sr *streamReaders) MustClose() {
|
||||
sr.metaindexReader.MustClose()
|
||||
sr.indexReader.MustClose()
|
||||
sr.columnsHeaderReader.MustClose()
|
||||
sr.timestampsReader.MustClose()
|
||||
sr.fieldValuesReader.MustClose()
|
||||
sr.fieldBloomFilterReader.MustClose()
|
||||
sr.messageValuesReader.MustClose()
|
||||
sr.messageBloomFilterReader.MustClose()
|
||||
}
|
||||
|
||||
// blockStreamReader is used for reading blocks in streaming manner from a part.
|
||||
type blockStreamReader struct {
|
||||
// blockData contains the data for the last read block
|
||||
blockData blockData
|
||||
|
||||
// ph is the header for the part
|
||||
ph partHeader
|
||||
|
||||
// streamReaders contains data readers in stream mode
|
||||
streamReaders streamReaders
|
||||
|
||||
// indexBlockHeaders contains the list of all the indexBlockHeader entries for the part
|
||||
indexBlockHeaders []indexBlockHeader
|
||||
|
||||
// blockHeaders contains the list of blockHeader entries for the current indexBlockHeader pointed by nextIndexBlockIdx
|
||||
blockHeaders []blockHeader
|
||||
|
||||
// nextIndexBlockIdx is the index of the next item to read from indexBlockHeaders
|
||||
nextIndexBlockIdx int
|
||||
|
||||
// nextBlockIdx is the index of the next item to read from blockHeaders
|
||||
nextBlockIdx int
|
||||
|
||||
// globalUncompressedSizeBytes is the total size of log entries seen in the part
|
||||
globalUncompressedSizeBytes uint64
|
||||
|
||||
// globalRowsCount is the number of log entries seen in the part
|
||||
globalRowsCount uint64
|
||||
|
||||
// globalBlocksCount is the number of blocks seen in the part
|
||||
globalBlocksCount uint64
|
||||
|
||||
// sidLast is the stream id for the previously read block
|
||||
sidLast streamID
|
||||
|
||||
// minTimestampLast is the minimum timestamp for the previously read block
|
||||
minTimestampLast int64
|
||||
}
|
||||
|
||||
// reset resets bsr, so it can be re-used
|
||||
func (bsr *blockStreamReader) reset() {
|
||||
bsr.blockData.reset()
|
||||
bsr.ph.reset()
|
||||
bsr.streamReaders.reset()
|
||||
|
||||
ihs := bsr.indexBlockHeaders
|
||||
if len(ihs) > 10e3 {
|
||||
// The ihs len is unbound, so it is better to drop too long indexBlockHeaders in order to reduce memory usage
|
||||
ihs = nil
|
||||
}
|
||||
for i := range ihs {
|
||||
ihs[i].reset()
|
||||
}
|
||||
bsr.indexBlockHeaders = ihs[:0]
|
||||
|
||||
bhs := bsr.blockHeaders
|
||||
for i := range bhs {
|
||||
bhs[i].reset()
|
||||
}
|
||||
bsr.blockHeaders = bhs[:0]
|
||||
|
||||
bsr.nextIndexBlockIdx = 0
|
||||
bsr.nextBlockIdx = 0
|
||||
bsr.globalUncompressedSizeBytes = 0
|
||||
bsr.globalRowsCount = 0
|
||||
bsr.globalBlocksCount = 0
|
||||
|
||||
bsr.sidLast.reset()
|
||||
bsr.minTimestampLast = 0
|
||||
}
|
||||
|
||||
// Path returns part path for bsr (e.g. file path, url or in-memory reference)
|
||||
func (bsr *blockStreamReader) Path() string {
|
||||
path := bsr.streamReaders.metaindexReader.Path()
|
||||
return filepath.Dir(path)
|
||||
}
|
||||
|
||||
// MustInitFromInmemoryPart initializes bsr from mp.
|
||||
func (bsr *blockStreamReader) MustInitFromInmemoryPart(mp *inmemoryPart) {
|
||||
bsr.reset()
|
||||
|
||||
bsr.ph = mp.ph
|
||||
|
||||
// Initialize streamReaders
|
||||
metaindexReader := mp.metaindex.NewReader()
|
||||
indexReader := mp.index.NewReader()
|
||||
columnsHeaderReader := mp.columnsHeader.NewReader()
|
||||
timestampsReader := mp.timestamps.NewReader()
|
||||
fieldValuesReader := mp.fieldValues.NewReader()
|
||||
fieldBloomFilterReader := mp.fieldBloomFilter.NewReader()
|
||||
messageValuesReader := mp.messageValues.NewReader()
|
||||
messageBloomFilterReader := mp.messageBloomFilter.NewReader()
|
||||
|
||||
bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader,
|
||||
fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader)
|
||||
|
||||
// Read metaindex data
|
||||
bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader)
|
||||
}
|
||||
|
||||
// MustInitFromFilePart initializes bsr from file part at the given path.
|
||||
func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
||||
bsr.reset()
|
||||
|
||||
// Files in the part are always read without OS cache pollution,
|
||||
// since they are usually deleted after the merge.
|
||||
const nocache = true
|
||||
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
fieldValuesPath := filepath.Join(path, fieldValuesFilename)
|
||||
fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
|
||||
bsr.ph.mustReadMetadata(path)
|
||||
|
||||
// Open data readers
|
||||
metaindexReader := filestream.MustOpen(metaindexPath, nocache)
|
||||
indexReader := filestream.MustOpen(indexPath, nocache)
|
||||
columnsHeaderReader := filestream.MustOpen(columnsHeaderPath, nocache)
|
||||
timestampsReader := filestream.MustOpen(timestampsPath, nocache)
|
||||
fieldValuesReader := filestream.MustOpen(fieldValuesPath, nocache)
|
||||
fieldBloomFilterReader := filestream.MustOpen(fieldBloomFilterPath, nocache)
|
||||
messageValuesReader := filestream.MustOpen(messageValuesPath, nocache)
|
||||
messageBloomFilterReader := filestream.MustOpen(messageBloomFilterPath, nocache)
|
||||
|
||||
// Initialize streamReaders
|
||||
bsr.streamReaders.init(metaindexReader, indexReader, columnsHeaderReader, timestampsReader,
|
||||
fieldValuesReader, fieldBloomFilterReader, messageValuesReader, messageBloomFilterReader)
|
||||
|
||||
// Read metaindex data
|
||||
bsr.indexBlockHeaders = mustReadIndexBlockHeaders(bsr.indexBlockHeaders[:0], &bsr.streamReaders.metaindexReader)
|
||||
}
|
||||
|
||||
// NextBlock reads the next block from bsr and puts it into bsr.blockData.
|
||||
//
|
||||
// false is returned if there are no other blocks.
|
||||
func (bsr *blockStreamReader) NextBlock() bool {
|
||||
for bsr.nextBlockIdx >= len(bsr.blockHeaders) {
|
||||
if !bsr.nextIndexBlock() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx-1]
|
||||
bh := &bsr.blockHeaders[bsr.nextBlockIdx]
|
||||
th := &bh.timestampsHeader
|
||||
|
||||
// Validate bh
|
||||
if bh.streamID.less(&bsr.sidLast) {
|
||||
logger.Panicf("FATAL: %s: blockHeader.streamID=%s cannot be smaller than the streamID from the previously read block: %s", bsr.Path(), &bh.streamID, &bsr.sidLast)
|
||||
}
|
||||
if bh.streamID.equal(&bsr.sidLast) && th.minTimestamp < bsr.minTimestampLast {
|
||||
logger.Panicf("FATAL: %s: timestamps.minTimestamp=%d cannot be smaller than the minTimestamp for the previously read block for the same streamID: %d",
|
||||
bsr.Path(), th.minTimestamp, bsr.minTimestampLast)
|
||||
}
|
||||
bsr.minTimestampLast = th.minTimestamp
|
||||
bsr.sidLast = bh.streamID
|
||||
if th.minTimestamp < ih.minTimestamp {
|
||||
logger.Panicf("FATAL: %s: timestampsHeader.minTimestamp=%d cannot be smaller than indexBlockHeader.minTimestamp=%d", bsr.Path(), th.minTimestamp, ih.minTimestamp)
|
||||
}
|
||||
if th.maxTimestamp > ih.maxTimestamp {
|
||||
logger.Panicf("FATAL: %s: timestampsHeader.maxTimestamp=%d cannot be bigger than indexBlockHeader.maxTimestamp=%d", bsr.Path(), th.maxTimestamp, ih.minTimestamp)
|
||||
}
|
||||
|
||||
// Read bsr.blockData
|
||||
bsr.blockData.mustReadFrom(bh, &bsr.streamReaders)
|
||||
|
||||
bsr.globalUncompressedSizeBytes += bh.uncompressedSizeBytes
|
||||
bsr.globalRowsCount += bh.rowsCount
|
||||
bsr.globalBlocksCount++
|
||||
if bsr.globalUncompressedSizeBytes > bsr.ph.UncompressedSizeBytes {
|
||||
logger.Panicf("FATAL: %s: too big size of entries read: %d; mustn't exceed partHeader.UncompressedSizeBytes=%d",
|
||||
bsr.Path(), bsr.globalUncompressedSizeBytes, bsr.ph.UncompressedSizeBytes)
|
||||
}
|
||||
if bsr.globalRowsCount > bsr.ph.RowsCount {
|
||||
logger.Panicf("FATAL: %s: too many log entries read so far: %d; mustn't exceed partHeader.RowsCount=%d", bsr.Path(), bsr.globalRowsCount, bsr.ph.RowsCount)
|
||||
}
|
||||
if bsr.globalBlocksCount > bsr.ph.BlocksCount {
|
||||
logger.Panicf("FATAL: %s: too many blocks read so far: %d; mustn't exceed partHeader.BlocksCount=%d", bsr.Path(), bsr.globalBlocksCount, bsr.ph.BlocksCount)
|
||||
}
|
||||
|
||||
// The block has been sucessfully read
|
||||
bsr.nextBlockIdx++
|
||||
return true
|
||||
}
|
||||
|
||||
func (bsr *blockStreamReader) nextIndexBlock() bool {
|
||||
// Advance to the next indexBlockHeader
|
||||
if bsr.nextIndexBlockIdx >= len(bsr.indexBlockHeaders) {
|
||||
// No more blocks left
|
||||
// Validate bsr.ph
|
||||
totalBytesRead := bsr.streamReaders.totalBytesRead()
|
||||
if bsr.ph.CompressedSizeBytes != totalBytesRead {
|
||||
logger.Panicf("FATAL: %s: partHeader.CompressedSizeBytes=%d must match the size of data read: %d", bsr.Path(), bsr.ph.CompressedSizeBytes, totalBytesRead)
|
||||
}
|
||||
if bsr.ph.UncompressedSizeBytes != bsr.globalUncompressedSizeBytes {
|
||||
logger.Panicf("FATAL: %s: partHeader.UncompressedSizeBytes=%d must match the size of entries read: %d",
|
||||
bsr.Path(), bsr.ph.UncompressedSizeBytes, bsr.globalUncompressedSizeBytes)
|
||||
}
|
||||
if bsr.ph.RowsCount != bsr.globalRowsCount {
|
||||
logger.Panicf("FATAL: %s: partHeader.RowsCount=%d must match the number of log entries read: %d", bsr.Path(), bsr.ph.RowsCount, bsr.globalRowsCount)
|
||||
}
|
||||
if bsr.ph.BlocksCount != bsr.globalBlocksCount {
|
||||
logger.Panicf("FATAL: %s: partHeader.BlocksCount=%d must match the number of blocks read: %d", bsr.Path(), bsr.ph.BlocksCount, bsr.globalBlocksCount)
|
||||
}
|
||||
return false
|
||||
}
|
||||
ih := &bsr.indexBlockHeaders[bsr.nextIndexBlockIdx]
|
||||
|
||||
// Validate ih
|
||||
metaindexReader := &bsr.streamReaders.metaindexReader
|
||||
if ih.minTimestamp < bsr.ph.MinTimestamp {
|
||||
logger.Panicf("FATAL: %s: indexBlockHeader.minTimestamp=%d cannot be smaller than partHeader.MinTimestamp=%d",
|
||||
metaindexReader.Path(), ih.minTimestamp, bsr.ph.MinTimestamp)
|
||||
}
|
||||
if ih.maxTimestamp > bsr.ph.MaxTimestamp {
|
||||
logger.Panicf("FATAL: %s: indexBlockHeader.maxTimestamp=%d cannot be bigger than partHeader.MaxTimestamp=%d",
|
||||
metaindexReader.Path(), ih.maxTimestamp, bsr.ph.MaxTimestamp)
|
||||
}
|
||||
|
||||
// Read indexBlock for the given ih
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = ih.mustReadNextIndexBlock(bb.B[:0], &bsr.streamReaders)
|
||||
bsr.blockHeaders = resetBlockHeaders(bsr.blockHeaders)
|
||||
var err error
|
||||
bsr.blockHeaders, err = unmarshalBlockHeaders(bsr.blockHeaders[:0], bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal blockHeader entries: %s", bsr.streamReaders.indexReader.Path(), err)
|
||||
}
|
||||
|
||||
bsr.nextIndexBlockIdx++
|
||||
bsr.nextBlockIdx = 0
|
||||
return true
|
||||
}
|
||||
|
||||
// MustClose closes bsr.
|
||||
func (bsr *blockStreamReader) MustClose() {
|
||||
bsr.streamReaders.MustClose()
|
||||
bsr.reset()
|
||||
}
|
||||
|
||||
// getBlockStreamReader returns blockStreamReader.
|
||||
//
|
||||
// The returned blockStreamReader must be initialized with MustInit().
|
||||
// call putBlockStreamReader() when the retruend blockStreamReader is no longer needed.
|
||||
func getBlockStreamReader() *blockStreamReader {
|
||||
v := blockStreamReaderPool.Get()
|
||||
if v == nil {
|
||||
v = &blockStreamReader{}
|
||||
}
|
||||
bsr := v.(*blockStreamReader)
|
||||
return bsr
|
||||
}
|
||||
|
||||
// putBlockStreamReader returns bsr to the pool.
|
||||
//
|
||||
// bsr cannot be used after returning to the pool.
|
||||
func putBlockStreamReader(bsr *blockStreamReader) {
|
||||
bsr.reset()
|
||||
blockStreamReaderPool.Put(bsr)
|
||||
}
|
||||
|
||||
var blockStreamReaderPool sync.Pool
|
||||
|
||||
// mustCloseBlockStreamReaders calls MustClose() on the given bsrs.
|
||||
func mustCloseBlockStreamReaders(bsrs []*blockStreamReader) {
|
||||
for _, bsr := range bsrs {
|
||||
bsr.MustClose()
|
||||
}
|
||||
}
|
362
lib/logstorage/block_stream_writer.go
Normal file
362
lib/logstorage/block_stream_writer.go
Normal file
|
@ -0,0 +1,362 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// writerWithStats writes data to w and tracks the total amounts of data written at bytesWritten.
|
||||
type writerWithStats struct {
|
||||
w filestream.WriteCloser
|
||||
bytesWritten uint64
|
||||
}
|
||||
|
||||
func (w *writerWithStats) reset() {
|
||||
w.w = nil
|
||||
w.bytesWritten = 0
|
||||
}
|
||||
|
||||
func (w *writerWithStats) init(wc filestream.WriteCloser) {
|
||||
w.reset()
|
||||
|
||||
w.w = wc
|
||||
}
|
||||
|
||||
func (w *writerWithStats) Path() string {
|
||||
return w.w.Path()
|
||||
}
|
||||
|
||||
func (w *writerWithStats) MustWrite(data []byte) {
|
||||
fs.MustWriteData(w.w, data)
|
||||
w.bytesWritten += uint64(len(data))
|
||||
}
|
||||
|
||||
// MustClose closes the underlying w.
|
||||
func (w *writerWithStats) MustClose() {
|
||||
w.w.MustClose()
|
||||
}
|
||||
|
||||
// streamWriters contain writers for blockStreamWriter
|
||||
type streamWriters struct {
|
||||
metaindexWriter writerWithStats
|
||||
indexWriter writerWithStats
|
||||
columnsHeaderWriter writerWithStats
|
||||
timestampsWriter writerWithStats
|
||||
fieldValuesWriter writerWithStats
|
||||
fieldBloomFilterWriter writerWithStats
|
||||
messageValuesWriter writerWithStats
|
||||
messageBloomFilterWriter writerWithStats
|
||||
}
|
||||
|
||||
func (sw *streamWriters) reset() {
|
||||
sw.metaindexWriter.reset()
|
||||
sw.indexWriter.reset()
|
||||
sw.columnsHeaderWriter.reset()
|
||||
sw.timestampsWriter.reset()
|
||||
sw.fieldValuesWriter.reset()
|
||||
sw.fieldBloomFilterWriter.reset()
|
||||
sw.messageValuesWriter.reset()
|
||||
sw.messageBloomFilterWriter.reset()
|
||||
}
|
||||
|
||||
func (sw *streamWriters) init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter, fieldValuesWriter, fieldBloomFilterWriter,
|
||||
messageValuesWriter, messageBloomFilterWriter filestream.WriteCloser,
|
||||
) {
|
||||
sw.metaindexWriter.init(metaindexWriter)
|
||||
sw.indexWriter.init(indexWriter)
|
||||
sw.columnsHeaderWriter.init(columnsHeaderWriter)
|
||||
sw.timestampsWriter.init(timestampsWriter)
|
||||
sw.fieldValuesWriter.init(fieldValuesWriter)
|
||||
sw.fieldBloomFilterWriter.init(fieldBloomFilterWriter)
|
||||
sw.messageValuesWriter.init(messageValuesWriter)
|
||||
sw.messageBloomFilterWriter.init(messageBloomFilterWriter)
|
||||
}
|
||||
|
||||
func (sw *streamWriters) totalBytesWritten() uint64 {
|
||||
n := uint64(0)
|
||||
n += sw.metaindexWriter.bytesWritten
|
||||
n += sw.indexWriter.bytesWritten
|
||||
n += sw.columnsHeaderWriter.bytesWritten
|
||||
n += sw.timestampsWriter.bytesWritten
|
||||
n += sw.fieldValuesWriter.bytesWritten
|
||||
n += sw.fieldBloomFilterWriter.bytesWritten
|
||||
n += sw.messageValuesWriter.bytesWritten
|
||||
n += sw.messageBloomFilterWriter.bytesWritten
|
||||
return n
|
||||
}
|
||||
|
||||
func (sw *streamWriters) MustClose() {
|
||||
sw.metaindexWriter.MustClose()
|
||||
sw.indexWriter.MustClose()
|
||||
sw.columnsHeaderWriter.MustClose()
|
||||
sw.timestampsWriter.MustClose()
|
||||
sw.fieldValuesWriter.MustClose()
|
||||
sw.fieldBloomFilterWriter.MustClose()
|
||||
sw.messageValuesWriter.MustClose()
|
||||
sw.messageBloomFilterWriter.MustClose()
|
||||
}
|
||||
|
||||
// blockStreamWriter is used for writing blocks into the underlying storage in streaming manner.
|
||||
type blockStreamWriter struct {
|
||||
// streamWriters contains writer for block data
|
||||
streamWriters streamWriters
|
||||
|
||||
// sidLast is the streamID for the last written block
|
||||
sidLast streamID
|
||||
|
||||
// sidFirst is the streamID for the first block in the current indexBlock
|
||||
sidFirst streamID
|
||||
|
||||
// minTimestampLast is the minimum timestamp seen for the last written block
|
||||
minTimestampLast int64
|
||||
|
||||
// minTimestamp is the minimum timestamp seen across written blocks for the current indexBlock
|
||||
minTimestamp int64
|
||||
|
||||
// maxTimestamp is the maximum timestamp seen across written blocks for the current indexBlock
|
||||
maxTimestamp int64
|
||||
|
||||
// hasWrittenBlocks is set to true if at least a single block is written to the current indexBlock
|
||||
hasWrittenBlocks bool
|
||||
|
||||
// globalUncompressedSizeBytes is the total size of all the log entries written via bsw
|
||||
globalUncompressedSizeBytes uint64
|
||||
|
||||
// globalRowsCount is the total number of log entries written via bsw
|
||||
globalRowsCount uint64
|
||||
|
||||
// globalBlocksCount is the total number of blocks written to bsw
|
||||
globalBlocksCount uint64
|
||||
|
||||
// globalMinTimestamp is the minimum timestamp seen across all the blocks written to bsw
|
||||
globalMinTimestamp int64
|
||||
|
||||
// globalMaxTimestamp is the maximum timestamp seen across all the blocks written to bsw
|
||||
globalMaxTimestamp int64
|
||||
|
||||
// indexBlockData contains marshaled blockHeader data, which isn't written yet to indexFilename
|
||||
indexBlockData []byte
|
||||
|
||||
// metaindexData contains marshaled indexBlockHeader data, which isn't written yet to metaindexFilename
|
||||
metaindexData []byte
|
||||
|
||||
// indexBlockHeader is used for marshaling the data to metaindexData
|
||||
indexBlockHeader indexBlockHeader
|
||||
}
|
||||
|
||||
// reset resets bsw for subsequent re-use.
|
||||
func (bsw *blockStreamWriter) reset() {
|
||||
bsw.streamWriters.reset()
|
||||
bsw.sidLast.reset()
|
||||
bsw.sidFirst.reset()
|
||||
bsw.minTimestampLast = 0
|
||||
bsw.minTimestamp = 0
|
||||
bsw.maxTimestamp = 0
|
||||
bsw.hasWrittenBlocks = false
|
||||
bsw.globalUncompressedSizeBytes = 0
|
||||
bsw.globalRowsCount = 0
|
||||
bsw.globalBlocksCount = 0
|
||||
bsw.globalMinTimestamp = 0
|
||||
bsw.globalMaxTimestamp = 0
|
||||
bsw.indexBlockData = bsw.indexBlockData[:0]
|
||||
|
||||
if len(bsw.metaindexData) > 1024*1024 {
|
||||
// The length of bsw.metaindexData is unbound, so drop too long buffer
|
||||
// in order to conserve memory.
|
||||
bsw.metaindexData = nil
|
||||
} else {
|
||||
bsw.metaindexData = bsw.metaindexData[:0]
|
||||
}
|
||||
|
||||
bsw.indexBlockHeader.reset()
|
||||
}
|
||||
|
||||
// MustInitFromInmemoryPart initializes bsw from mp
|
||||
func (bsw *blockStreamWriter) MustInitForInmemoryPart(mp *inmemoryPart) {
|
||||
bsw.reset()
|
||||
bsw.streamWriters.init(&mp.metaindex, &mp.index, &mp.columnsHeader, &mp.timestamps, &mp.fieldValues, &mp.fieldBloomFilter, &mp.messageValues, &mp.messageBloomFilter)
|
||||
}
|
||||
|
||||
// MustInitForFilePart initializes bsw for writing data to file part located at path.
|
||||
//
|
||||
// if nocache is true, then the written data doesn't go to OS page cache.
|
||||
func (bsw *blockStreamWriter) MustInitForFilePart(path string, nocache bool) {
|
||||
bsw.reset()
|
||||
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
fieldValuesPath := filepath.Join(path, fieldValuesFilename)
|
||||
fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
|
||||
// Always cache metaindex file, since it it re-read immediately after part creation
|
||||
metaindexWriter := filestream.MustCreate(metaindexPath, false)
|
||||
|
||||
indexWriter := filestream.MustCreate(indexPath, nocache)
|
||||
columnsHeaderWriter := filestream.MustCreate(columnsHeaderPath, nocache)
|
||||
timestampsWriter := filestream.MustCreate(timestampsPath, nocache)
|
||||
fieldValuesWriter := filestream.MustCreate(fieldValuesPath, nocache)
|
||||
fieldBloomFilterWriter := filestream.MustCreate(fieldBloomFilterPath, nocache)
|
||||
messageValuesWriter := filestream.MustCreate(messageValuesPath, nocache)
|
||||
messageBloomFilterWriter := filestream.MustCreate(messageBloomFilterPath, nocache)
|
||||
|
||||
bsw.streamWriters.init(metaindexWriter, indexWriter, columnsHeaderWriter, timestampsWriter,
|
||||
fieldValuesWriter, fieldBloomFilterWriter, messageValuesWriter, messageBloomFilterWriter)
|
||||
}
|
||||
|
||||
// MustWriteRows writes timestamps with rows under the given sid to bsw.
|
||||
//
|
||||
// timestamps must be sorted.
|
||||
// sid must be bigger or equal to the sid for the previously written rs.
|
||||
func (bsw *blockStreamWriter) MustWriteRows(sid *streamID, timestamps []int64, rows [][]Field) {
|
||||
if len(timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
b := getBlock()
|
||||
b.MustInitFromRows(timestamps, rows)
|
||||
bsw.MustWriteBlock(sid, b)
|
||||
putBlock(b)
|
||||
}
|
||||
|
||||
// MustWriteBlockData writes bd to bsw.
|
||||
//
|
||||
// The bd.streamID must be bigger or equal to the streamID for the previously written blocks.
|
||||
func (bsw *blockStreamWriter) MustWriteBlockData(bd *blockData) {
|
||||
if bd.rowsCount == 0 {
|
||||
return
|
||||
}
|
||||
bsw.mustWriteBlockInternal(&bd.streamID, nil, bd)
|
||||
}
|
||||
|
||||
// MustWriteBlock writes b under the given sid to bsw.
|
||||
//
|
||||
// The sid must be bigger or equal to the sid for the previously written blocks.
|
||||
// The minimum timestamp in b must be bigger or equal to the minimum timestamp written to the same sid.
|
||||
func (bsw *blockStreamWriter) MustWriteBlock(sid *streamID, b *block) {
|
||||
rowsCount := b.Len()
|
||||
if rowsCount == 0 {
|
||||
return
|
||||
}
|
||||
bsw.mustWriteBlockInternal(sid, b, nil)
|
||||
}
|
||||
|
||||
func (bsw *blockStreamWriter) mustWriteBlockInternal(sid *streamID, b *block, bd *blockData) {
|
||||
if sid.less(&bsw.sidLast) {
|
||||
logger.Panicf("BUG: the sid=%s cannot be smaller than the previously written sid=%s", sid, &bsw.sidLast)
|
||||
}
|
||||
hasWrittenBlocks := bsw.hasWrittenBlocks
|
||||
if !hasWrittenBlocks {
|
||||
bsw.sidFirst = *sid
|
||||
bsw.hasWrittenBlocks = true
|
||||
}
|
||||
isSeenSid := sid.equal(&bsw.sidLast)
|
||||
bsw.sidLast = *sid
|
||||
|
||||
bh := getBlockHeader()
|
||||
if b != nil {
|
||||
b.mustWriteTo(sid, bh, &bsw.streamWriters)
|
||||
} else {
|
||||
bd.mustWriteTo(bh, &bsw.streamWriters)
|
||||
}
|
||||
th := &bh.timestampsHeader
|
||||
if bsw.globalRowsCount == 0 || th.minTimestamp < bsw.globalMinTimestamp {
|
||||
bsw.globalMinTimestamp = th.minTimestamp
|
||||
}
|
||||
if bsw.globalRowsCount == 0 || th.maxTimestamp > bsw.globalMaxTimestamp {
|
||||
bsw.globalMaxTimestamp = th.maxTimestamp
|
||||
}
|
||||
if !hasWrittenBlocks || th.minTimestamp < bsw.minTimestamp {
|
||||
bsw.minTimestamp = th.minTimestamp
|
||||
}
|
||||
if !hasWrittenBlocks || th.maxTimestamp > bsw.maxTimestamp {
|
||||
bsw.maxTimestamp = th.maxTimestamp
|
||||
}
|
||||
if isSeenSid && th.minTimestamp < bsw.minTimestampLast {
|
||||
logger.Panicf("BUG: the block for sid=%s cannot contain timestamp smaller than %d, but it contains timestamp %d", sid, bsw.minTimestampLast, th.minTimestamp)
|
||||
}
|
||||
bsw.minTimestampLast = th.minTimestamp
|
||||
|
||||
bsw.globalUncompressedSizeBytes += bh.uncompressedSizeBytes
|
||||
bsw.globalRowsCount += bh.rowsCount
|
||||
bsw.globalBlocksCount++
|
||||
|
||||
// Marshal bh
|
||||
bsw.indexBlockData = bh.marshal(bsw.indexBlockData)
|
||||
putBlockHeader(bh)
|
||||
if len(bsw.indexBlockData) > maxUncompressedIndexBlockSize {
|
||||
bsw.mustFlushIndexBlock(bsw.indexBlockData)
|
||||
bsw.indexBlockData = bsw.indexBlockData[:0]
|
||||
}
|
||||
}
|
||||
|
||||
func (bsw *blockStreamWriter) mustFlushIndexBlock(data []byte) {
|
||||
if len(data) > 0 {
|
||||
bsw.indexBlockHeader.mustWriteIndexBlock(data, bsw.sidFirst, bsw.minTimestamp, bsw.maxTimestamp, &bsw.streamWriters)
|
||||
bsw.metaindexData = bsw.indexBlockHeader.marshal(bsw.metaindexData)
|
||||
}
|
||||
bsw.hasWrittenBlocks = false
|
||||
bsw.minTimestamp = 0
|
||||
bsw.maxTimestamp = 0
|
||||
bsw.sidFirst.reset()
|
||||
}
|
||||
|
||||
// Finalize() finalizes the data write process and updates ph with the finalized stats
|
||||
//
|
||||
// It closes the writers passed to MustInit().
|
||||
//
|
||||
// bsw can be re-used after calling Finalize().
|
||||
func (bsw *blockStreamWriter) Finalize(ph *partHeader) {
|
||||
ph.UncompressedSizeBytes = bsw.globalUncompressedSizeBytes
|
||||
ph.RowsCount = bsw.globalRowsCount
|
||||
ph.BlocksCount = bsw.globalBlocksCount
|
||||
ph.MinTimestamp = bsw.globalMinTimestamp
|
||||
ph.MaxTimestamp = bsw.globalMaxTimestamp
|
||||
|
||||
bsw.mustFlushIndexBlock(bsw.indexBlockData)
|
||||
|
||||
// Write metaindex data
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = encoding.CompressZSTDLevel(bb.B[:0], bsw.metaindexData, 1)
|
||||
bsw.streamWriters.metaindexWriter.MustWrite(bb.B)
|
||||
if len(bb.B) < 1024*1024 {
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
|
||||
ph.CompressedSizeBytes = bsw.streamWriters.totalBytesWritten()
|
||||
|
||||
bsw.streamWriters.MustClose()
|
||||
bsw.reset()
|
||||
}
|
||||
|
||||
var longTermBufPool bytesutil.ByteBufferPool
|
||||
|
||||
// getBlockStreamWriter returns new blockStreamWriter from the pool.
|
||||
//
|
||||
// Return back the blockStreamWriter to the pool when it is no longer needed by calling putBlockStreamWriter.
|
||||
func getBlockStreamWriter() *blockStreamWriter {
|
||||
v := blockStreamWriterPool.Get()
|
||||
if v == nil {
|
||||
return &blockStreamWriter{}
|
||||
}
|
||||
return v.(*blockStreamWriter)
|
||||
}
|
||||
|
||||
// putBlockStreamWriter returns bsw to the pool.
|
||||
func putBlockStreamWriter(bsw *blockStreamWriter) {
|
||||
bsw.reset()
|
||||
blockStreamWriterPool.Put(bsw)
|
||||
}
|
||||
|
||||
var blockStreamWriterPool sync.Pool
|
179
lib/logstorage/block_test.go
Normal file
179
lib/logstorage/block_test.go
Normal file
|
@ -0,0 +1,179 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBlockMustInitFromRows(t *testing.T) {
|
||||
f := func(timestamps []int64, rows [][]Field, bExpected *block) {
|
||||
t.Helper()
|
||||
b := getBlock()
|
||||
defer putBlock(b)
|
||||
|
||||
b.MustInitFromRows(timestamps, rows)
|
||||
if b.uncompressedSizeBytes() >= maxUncompressedBlockSize {
|
||||
t.Fatalf("expecting non-full block")
|
||||
}
|
||||
if !reflect.DeepEqual(b, bExpected) {
|
||||
t.Fatalf("unexpected block;\ngot\n%v\nwant\n%v", b, bExpected)
|
||||
}
|
||||
if n := b.Len(); n != len(timestamps) {
|
||||
t.Fatalf("unexpected block len; got %d; want %d", n, len(timestamps))
|
||||
}
|
||||
b.assertValid()
|
||||
}
|
||||
|
||||
// An empty log entries
|
||||
f(nil, nil, &block{})
|
||||
f([]int64{}, [][]Field{}, &block{})
|
||||
|
||||
// A single row
|
||||
timestamps := []int64{1234}
|
||||
rows := [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "msg",
|
||||
Value: "foo",
|
||||
},
|
||||
{
|
||||
Name: "level",
|
||||
Value: "error",
|
||||
},
|
||||
},
|
||||
}
|
||||
bExpected := &block{
|
||||
timestamps: []int64{1234},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "level",
|
||||
Value: "error",
|
||||
},
|
||||
{
|
||||
Name: "msg",
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestamps, rows, bExpected)
|
||||
|
||||
// Multiple log entries with the same set of fields
|
||||
timestamps = []int64{3, 5}
|
||||
rows = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
},
|
||||
{
|
||||
Name: "instance",
|
||||
Value: "host1",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
},
|
||||
{
|
||||
Name: "instance",
|
||||
Value: "host2",
|
||||
},
|
||||
},
|
||||
}
|
||||
bExpected = &block{
|
||||
timestamps: []int64{3, 5},
|
||||
columns: []column{
|
||||
{
|
||||
name: "instance",
|
||||
values: []string{"host1", "host2"},
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "job",
|
||||
Value: "foo",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestamps, rows, bExpected)
|
||||
|
||||
// Multiple log entries with distinct set of fields
|
||||
timestamps = []int64{3, 5, 10}
|
||||
rows = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "msg",
|
||||
Value: "foo",
|
||||
},
|
||||
{
|
||||
Name: "b",
|
||||
Value: "xyz",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "b",
|
||||
Value: "xyz",
|
||||
},
|
||||
{
|
||||
Name: "a",
|
||||
Value: "aaa",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "b",
|
||||
Value: "xyz",
|
||||
},
|
||||
},
|
||||
}
|
||||
bExpected = &block{
|
||||
timestamps: []int64{3, 5, 10},
|
||||
columns: []column{
|
||||
{
|
||||
name: "a",
|
||||
values: []string{"", "aaa", ""},
|
||||
},
|
||||
{
|
||||
name: "msg",
|
||||
values: []string{"foo", "", ""},
|
||||
},
|
||||
},
|
||||
constColumns: []Field{
|
||||
{
|
||||
Name: "b",
|
||||
Value: "xyz",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestamps, rows, bExpected)
|
||||
}
|
||||
|
||||
func TestBlockMustInitFromRowsFullBlock(t *testing.T) {
|
||||
const rowsCount = 2000
|
||||
timestamps := make([]int64, rowsCount)
|
||||
rows := make([][]Field, rowsCount)
|
||||
for i := range timestamps {
|
||||
fields := make([]Field, 10)
|
||||
for j := range fields {
|
||||
fields[j] = Field{
|
||||
Name: fmt.Sprintf("field_%d", j),
|
||||
Value: "very very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong value",
|
||||
}
|
||||
}
|
||||
rows[i] = fields
|
||||
}
|
||||
|
||||
b := getBlock()
|
||||
defer putBlock(b)
|
||||
b.MustInitFromRows(timestamps, rows)
|
||||
if n := b.Len(); n != len(rows) {
|
||||
t.Fatalf("unexpected total log entries; got %d; want %d", n, len(rows))
|
||||
}
|
||||
if b.uncompressedSizeBytes() < maxUncompressedBlockSize {
|
||||
t.Fatalf("expecting full block")
|
||||
}
|
||||
b.assertValid()
|
||||
}
|
46
lib/logstorage/block_timing_test.go
Normal file
46
lib/logstorage/block_timing_test.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkBlock_MustInitFromRows(b *testing.B) {
|
||||
for _, rowsPerBlock := range []int{1, 10, 100, 1000, 10000} {
|
||||
b.Run(fmt.Sprintf("rowsPerBlock_%d", rowsPerBlock), func(b *testing.B) {
|
||||
benchmarkBlockMustInitFromRows(b, rowsPerBlock)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkBlockMustInitFromRows(b *testing.B, rowsPerBlock int) {
|
||||
timestamps, rows := newTestRows(rowsPerBlock, 10)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(timestamps)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
block := getBlock()
|
||||
defer putBlock(block)
|
||||
for pb.Next() {
|
||||
block.MustInitFromRows(timestamps, rows)
|
||||
if n := block.Len(); n != len(timestamps) {
|
||||
panic(fmt.Errorf("unexpected block length; got %d; want %d", n, len(timestamps)))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func newTestRows(rowsCount, fieldsPerRow int) ([]int64, [][]Field) {
|
||||
timestamps := make([]int64, rowsCount)
|
||||
rows := make([][]Field, rowsCount)
|
||||
for i := range timestamps {
|
||||
timestamps[i] = int64(i) * 1e9
|
||||
fields := make([]Field, fieldsPerRow)
|
||||
for j := range fields {
|
||||
f := &fields[j]
|
||||
f.Name = fmt.Sprintf("field_%d", j)
|
||||
f.Value = fmt.Sprintf("value_%d_%d", i, j)
|
||||
}
|
||||
rows[i] = fields
|
||||
}
|
||||
return timestamps, rows
|
||||
}
|
176
lib/logstorage/bloomfilter.go
Normal file
176
lib/logstorage/bloomfilter.go
Normal file
|
@ -0,0 +1,176 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
// bloomFilterHashesCount is the number of different hashes to use for bloom filter.
|
||||
const bloomFilterHashesCount = 6
|
||||
|
||||
// bloomFilterBitsPerItem is the number of bits to use per each token.
|
||||
const bloomFilterBitsPerItem = 16
|
||||
|
||||
// bloomFilterMarshal appends marshaled bloom filter for tokens to dst and returns the result.
|
||||
func bloomFilterMarshal(dst []byte, tokens []string) []byte {
|
||||
bf := getBloomFilter()
|
||||
bf.mustInit(tokens)
|
||||
dst = bf.marshal(dst)
|
||||
putBloomFilter(bf)
|
||||
return dst
|
||||
}
|
||||
|
||||
type bloomFilter struct {
|
||||
bits []uint64
|
||||
}
|
||||
|
||||
func (bf *bloomFilter) reset() {
|
||||
bits := bf.bits
|
||||
for i := range bits {
|
||||
bits[i] = 0
|
||||
}
|
||||
bf.bits = bits[:0]
|
||||
}
|
||||
|
||||
// marshal appends marshaled bf to dst and returns the result.
|
||||
func (bf *bloomFilter) marshal(dst []byte) []byte {
|
||||
bits := bf.bits
|
||||
for _, word := range bits {
|
||||
dst = encoding.MarshalUint64(dst, word)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals bf from src.
|
||||
func (bf *bloomFilter) unmarshal(src []byte) error {
|
||||
if len(src)%8 != 0 {
|
||||
return fmt.Errorf("cannot unmarshal bloomFilter from src with size not multiple by 8; len(src)=%d", len(src))
|
||||
}
|
||||
bf.reset()
|
||||
wordsCount := len(src) / 8
|
||||
bits := bf.bits
|
||||
if n := wordsCount - cap(bits); n > 0 {
|
||||
bits = append(bits[:cap(bits)], make([]uint64, n)...)
|
||||
}
|
||||
bits = bits[:wordsCount]
|
||||
for i := range bits {
|
||||
bits[i] = encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
}
|
||||
bf.bits = bits
|
||||
return nil
|
||||
}
|
||||
|
||||
// mustInit initializes bf with the given tokens
|
||||
func (bf *bloomFilter) mustInit(tokens []string) {
|
||||
bitsCount := len(tokens) * bloomFilterBitsPerItem
|
||||
wordsCount := (bitsCount + 63) / 64
|
||||
bits := bf.bits
|
||||
if n := wordsCount - cap(bits); n > 0 {
|
||||
bits = append(bits[:cap(bits)], make([]uint64, n)...)
|
||||
}
|
||||
bits = bits[:wordsCount]
|
||||
bloomFilterAdd(bits, tokens)
|
||||
bf.bits = bits
|
||||
}
|
||||
|
||||
// bloomFilterAdd adds the given tokens to the bloom filter bits
|
||||
func bloomFilterAdd(bits []uint64, tokens []string) {
|
||||
maxBits := uint64(len(bits)) * 64
|
||||
var buf [8]byte
|
||||
hp := (*uint64)(unsafe.Pointer(&buf[0]))
|
||||
for _, token := range tokens {
|
||||
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
|
||||
for i := 0; i < bloomFilterHashesCount; i++ {
|
||||
hi := xxhash.Sum64(buf[:])
|
||||
(*hp)++
|
||||
idx := hi % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
bits[i] = w | mask
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// containsAll returns true if bf contains all the given tokens.
|
||||
func (bf *bloomFilter) containsAll(tokens []string) bool {
|
||||
bits := bf.bits
|
||||
if len(bits) == 0 {
|
||||
return true
|
||||
}
|
||||
maxBits := uint64(len(bits)) * 64
|
||||
var buf [8]byte
|
||||
hp := (*uint64)(unsafe.Pointer(&buf[0]))
|
||||
for _, token := range tokens {
|
||||
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
|
||||
for i := 0; i < bloomFilterHashesCount; i++ {
|
||||
hi := xxhash.Sum64(buf[:])
|
||||
(*hp)++
|
||||
idx := hi % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
// The token is missing
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// containsAny returns true if bf contains at least a single token from the given tokens.
|
||||
func (bf *bloomFilter) containsAny(tokens []string) bool {
|
||||
bits := bf.bits
|
||||
if len(bits) == 0 {
|
||||
return true
|
||||
}
|
||||
maxBits := uint64(len(bits)) * 64
|
||||
var buf [8]byte
|
||||
hp := (*uint64)(unsafe.Pointer(&buf[0]))
|
||||
nextToken:
|
||||
for _, token := range tokens {
|
||||
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
|
||||
for i := 0; i < bloomFilterHashesCount; i++ {
|
||||
hi := xxhash.Sum64(buf[:])
|
||||
(*hp)++
|
||||
idx := hi % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
// The token is missing. Check the next token
|
||||
continue nextToken
|
||||
}
|
||||
}
|
||||
// It is likely the token exists in the bloom filter
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getBloomFilter() *bloomFilter {
|
||||
v := bloomFilterPool.Get()
|
||||
if v == nil {
|
||||
return &bloomFilter{}
|
||||
}
|
||||
return v.(*bloomFilter)
|
||||
}
|
||||
|
||||
func putBloomFilter(bf *bloomFilter) {
|
||||
bf.reset()
|
||||
bloomFilterPool.Put(bf)
|
||||
}
|
||||
|
||||
var bloomFilterPool sync.Pool
|
84
lib/logstorage/bloomfilter_test.go
Normal file
84
lib/logstorage/bloomfilter_test.go
Normal file
|
@ -0,0 +1,84 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBloomFilter(t *testing.T) {
|
||||
f := func(tokens []string) {
|
||||
t.Helper()
|
||||
data := bloomFilterMarshal(nil, tokens)
|
||||
bf := getBloomFilter()
|
||||
defer putBloomFilter(bf)
|
||||
if err := bf.unmarshal(data); err != nil {
|
||||
t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
|
||||
}
|
||||
for _, token := range tokens {
|
||||
if !bf.containsAny([]string{token}) {
|
||||
t.Fatalf("bloomFilterContains must return true for the added token %q", token)
|
||||
}
|
||||
}
|
||||
if !bf.containsAll(tokens) {
|
||||
t.Fatalf("bloomFilterContains must return true for the added tokens")
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]string{"foo"})
|
||||
f([]string{"foo", "bar", "baz"})
|
||||
|
||||
// 10k tokens
|
||||
tokens := make([]string, 10000)
|
||||
for i := range tokens {
|
||||
tokens[i] = fmt.Sprintf("token_%d", i)
|
||||
}
|
||||
f(tokens)
|
||||
}
|
||||
|
||||
func TestBloomFilterUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
bf := getBloomFilter()
|
||||
defer putBloomFilter(bf)
|
||||
if err := bf.unmarshal(data); err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
}
|
||||
f([]byte("a"))
|
||||
f([]byte("foo"))
|
||||
}
|
||||
|
||||
func TestBloomFilterUnmarshalGarbage(t *testing.T) {
|
||||
data := []byte("01234567")
|
||||
var bf bloomFilter
|
||||
if err := bf.unmarshal(data); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBloomFilterFalsePositive(t *testing.T) {
|
||||
tokens := make([]string, 20000)
|
||||
for i := range tokens {
|
||||
tokens[i] = fmt.Sprintf("token_%d", i)
|
||||
}
|
||||
data := bloomFilterMarshal(nil, tokens)
|
||||
bf := getBloomFilter()
|
||||
defer putBloomFilter(bf)
|
||||
if err := bf.unmarshal(data); err != nil {
|
||||
t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
|
||||
}
|
||||
|
||||
// count the number of false positives on 20K non-existing tokens
|
||||
falsePositives := 0
|
||||
for i := range tokens {
|
||||
token := fmt.Sprintf("non-existing-token_%d", i)
|
||||
if bf.containsAny([]string{token}) {
|
||||
falsePositives++
|
||||
}
|
||||
}
|
||||
p := float64(falsePositives) / float64(len(tokens))
|
||||
maxFalsePositive := 0.0011
|
||||
if p > maxFalsePositive {
|
||||
t.Fatalf("too high false positive rate; got %.4f; want %.4f max", p, maxFalsePositive)
|
||||
}
|
||||
}
|
32
lib/logstorage/consts.go
Normal file
32
lib/logstorage/consts.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package logstorage
|
||||
|
||||
// maxUncompressedIndexBlockSize contains the maximum length of uncompressed block with blockHeader entries aka index block.
|
||||
//
|
||||
// The real block length can exceed this value by a small percentage because of the block write details.
|
||||
const maxUncompressedIndexBlockSize = 128 * 1024
|
||||
|
||||
// maxUncompressedBlockSize is the maximum size of uncompressed block in bytes.
|
||||
//
|
||||
// The real uncompressed block can exceed this value by up to 2 times because of block merge details.
|
||||
const maxUncompressedBlockSize = 2 * 1024 * 1024
|
||||
|
||||
// maxRowsPerBlock is the maximum number of log entries a single block can contain.
|
||||
const maxRowsPerBlock = 8 * 1024 * 1024
|
||||
|
||||
// maxColumnsPerBlock is the maximum number of columns per block.
|
||||
const maxColumnsPerBlock = 10000
|
||||
|
||||
// maxIndexBlockSize is the maximum size of the block with blockHeader entries (aka indexBlock)
|
||||
const maxIndexBlockSize = 8 * 1024 * 1024
|
||||
|
||||
// maxTimestampsBlockSize is the maximum size of timestamps block
|
||||
const maxTimestampsBlockSize = 8 * 1024 * 1024
|
||||
|
||||
// maxValuesBlockSize is the maximum size of values block
|
||||
const maxValuesBlockSize = 8 * 1024 * 1024
|
||||
|
||||
// maxBloomFilterBlockSize is the maximum size of bloom filter block
|
||||
const maxBloomFilterBlockSize = 8 * 1024 * 1024
|
||||
|
||||
// maxColumnsHeaderSize is the maximum size of columnsHeader block
|
||||
const maxColumnsHeaderSize = 8 * 1024 * 1024
|
990
lib/logstorage/datadb.go
Normal file
990
lib/logstorage/datadb.go
Normal file
|
@ -0,0 +1,990 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
)
|
||||
|
||||
// Default number of parts to merge at once.
|
||||
//
|
||||
// This number has been obtained empirically - it gives the lowest possible overhead.
|
||||
// See appendPartsToMerge tests for details.
|
||||
const defaultPartsToMerge = 15
|
||||
|
||||
// minMergeMultiplier is the minimum multiplier for the size of the output part
|
||||
// compared to the size of the maximum input part for the merge.
|
||||
//
|
||||
// Higher value reduces write amplification (disk write IO induced by the merge),
|
||||
// while increases the number of unmerged parts.
|
||||
// The 1.7 is good enough for production workloads.
|
||||
const minMergeMultiplier = 1.7
|
||||
|
||||
// The maximum number of inmemory parts in the partition.
|
||||
//
|
||||
// If the number of inmemory parts reaches this value, then assisted merge runs during data ingestion.
|
||||
const maxInmemoryPartsPerPartition = 20
|
||||
|
||||
// datadb represents a database with log data
|
||||
type datadb struct {
|
||||
// pt is the partition the datadb belongs to
|
||||
pt *partition
|
||||
|
||||
// mergeIdx is used for generating unique directory names for parts
|
||||
mergeIdx uint64
|
||||
|
||||
// path is the path to the directory with log data
|
||||
path string
|
||||
|
||||
// flushInterval is interval for flushing the inmemory parts to disk
|
||||
flushInterval time.Duration
|
||||
|
||||
// inmemoryParts contains a list of inmemory parts
|
||||
inmemoryParts []*partWrapper
|
||||
|
||||
// fileParts contains a list of file-based parts
|
||||
fileParts []*partWrapper
|
||||
|
||||
// partsLock protects parts from concurrent access
|
||||
partsLock sync.Mutex
|
||||
|
||||
// wg is used for determining when background workers stop
|
||||
wg sync.WaitGroup
|
||||
|
||||
// stopCh is used for notifying background workers to stop
|
||||
stopCh chan struct{}
|
||||
|
||||
// mergeDoneCond is used for pace-limiting the data ingestion rate
|
||||
mergeDoneCond *sync.Cond
|
||||
|
||||
// inmemoryPartsFlushersCount is the number of currently running in-memory parts flushers
|
||||
//
|
||||
// This variable must be accessed under partsLock.
|
||||
inmemoryPartsFlushersCount int
|
||||
|
||||
// mergeWorkersCount is the number of currently running merge workers
|
||||
//
|
||||
// This variable must be accessed under partsLock.
|
||||
mergeWorkersCount int
|
||||
}
|
||||
|
||||
// partWrapper is a wrapper for opened part.
|
||||
type partWrapper struct {
|
||||
// refCount is the number of references to p.
|
||||
//
|
||||
// When the number of references reaches zero, then p is closed.
|
||||
refCount int32
|
||||
|
||||
// The flag, which is set when the part must be deleted after refCount reaches zero.
|
||||
mustBeDeleted uint32
|
||||
|
||||
// p is an opened part
|
||||
p *part
|
||||
|
||||
// mp references inmemory part used for initializing p.
|
||||
mp *inmemoryPart
|
||||
|
||||
// isInMerge is set to true if the part takes part in merge.
|
||||
isInMerge bool
|
||||
|
||||
// The deadline when in-memory part must be flushed to disk.
|
||||
flushDeadline time.Time
|
||||
}
|
||||
|
||||
func (pw *partWrapper) incRef() {
|
||||
atomic.AddInt32(&pw.refCount, 1)
|
||||
}
|
||||
|
||||
func (pw *partWrapper) decRef() {
|
||||
n := atomic.AddInt32(&pw.refCount, -1)
|
||||
if n > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
deletePath := ""
|
||||
if pw.mp == nil {
|
||||
if atomic.LoadUint32(&pw.mustBeDeleted) != 0 {
|
||||
deletePath = pw.p.path
|
||||
}
|
||||
} else {
|
||||
putInmemoryPart(pw.mp)
|
||||
pw.mp = nil
|
||||
}
|
||||
|
||||
mustClosePart(pw.p)
|
||||
pw.p = nil
|
||||
|
||||
if deletePath != "" {
|
||||
fs.MustRemoveAll(deletePath)
|
||||
}
|
||||
}
|
||||
|
||||
func mustCreateDatadb(path string) {
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
mustWritePartNames(path, []string{})
|
||||
}
|
||||
|
||||
// mustOpenDatadb opens datadb at the given path with the given flushInterval for in-memory data.
|
||||
func mustOpenDatadb(pt *partition, path string, flushInterval time.Duration) *datadb {
|
||||
// Remove temporary directories, which may be left after unclean shutdown.
|
||||
fs.MustRemoveTemporaryDirs(path)
|
||||
|
||||
partNames := mustReadPartNames(path)
|
||||
mustRemoveUnusedDirs(path, partNames)
|
||||
|
||||
pws := make([]*partWrapper, len(partNames))
|
||||
for i, partName := range partNames {
|
||||
partPath := filepath.Join(path, partName)
|
||||
p := mustOpenFilePart(pt, partPath)
|
||||
pws[i] = newPartWrapper(p, nil, time.Time{})
|
||||
}
|
||||
|
||||
ddb := &datadb{
|
||||
pt: pt,
|
||||
mergeIdx: uint64(time.Now().UnixNano()),
|
||||
flushInterval: flushInterval,
|
||||
path: path,
|
||||
fileParts: pws,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
ddb.mergeDoneCond = sync.NewCond(&ddb.partsLock)
|
||||
|
||||
// Start merge workers in the hope they'll merge the remaining parts
|
||||
ddb.partsLock.Lock()
|
||||
n := getMergeWorkersCount()
|
||||
for i := 0; i < n; i++ {
|
||||
ddb.startMergeWorkerLocked()
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
|
||||
return ddb
|
||||
}
|
||||
|
||||
// startInmemoryPartsFlusherLocked starts flusher for in-memory parts to disk.
|
||||
//
|
||||
// This function must be called under partsLock.
|
||||
func (ddb *datadb) startInmemoryPartsFlusherLocked() {
|
||||
if ddb.inmemoryPartsFlushersCount >= 1 {
|
||||
return
|
||||
}
|
||||
ddb.inmemoryPartsFlushersCount++
|
||||
ddb.wg.Add(1)
|
||||
go func() {
|
||||
ddb.flushInmemoryParts()
|
||||
ddb.wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
func (ddb *datadb) flushInmemoryParts() {
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
ddb.partsLock.Lock()
|
||||
pws := make([]*partWrapper, 0, len(ddb.inmemoryParts))
|
||||
pws = appendNotInMergePartsLocked(pws, ddb.inmemoryParts)
|
||||
currentTime := time.Now()
|
||||
partsToFlush := pws[:0]
|
||||
for _, pw := range pws {
|
||||
if pw.flushDeadline.Before(currentTime) {
|
||||
partsToFlush = append(partsToFlush, pw)
|
||||
}
|
||||
}
|
||||
setInMergeLocked(partsToFlush)
|
||||
if len(pws) == 0 {
|
||||
ddb.inmemoryPartsFlushersCount--
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
|
||||
if len(pws) == 0 {
|
||||
// There are no in-memory parts, so stop the flusher.
|
||||
return
|
||||
}
|
||||
ddb.mustMergePartsFinal(partsToFlush)
|
||||
|
||||
select {
|
||||
case <-ddb.stopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startMergeWorkerLocked starts a merge worker.
|
||||
//
|
||||
// This function must be called under locked partsLock.
|
||||
func (ddb *datadb) startMergeWorkerLocked() {
|
||||
if ddb.mergeWorkersCount >= getMergeWorkersCount() {
|
||||
return
|
||||
}
|
||||
ddb.mergeWorkersCount++
|
||||
ddb.wg.Add(1)
|
||||
go func() {
|
||||
globalMergeLimitCh <- struct{}{}
|
||||
ddb.mustMergeExistingParts()
|
||||
<-globalMergeLimitCh
|
||||
ddb.wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
// globalMergeLimitCh limits the number of concurrent merges across all the partitions
|
||||
var globalMergeLimitCh = make(chan struct{}, getMergeWorkersCount())
|
||||
|
||||
func getMergeWorkersCount() int {
|
||||
n := cgroup.AvailableCPUs()
|
||||
if n < 4 {
|
||||
// Use bigger number of workers on systems with small number of CPU cores,
|
||||
// since a single worker may become busy for long time when merging big parts.
|
||||
// Then the remaining workers may continue performing merges
|
||||
// for newly added small parts.
|
||||
return 4
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (ddb *datadb) mustMergeExistingParts() {
|
||||
for !needStop(ddb.stopCh) {
|
||||
maxOutBytes := ddb.availableDiskSpace()
|
||||
|
||||
ddb.partsLock.Lock()
|
||||
parts := make([]*partWrapper, 0, len(ddb.inmemoryParts)+len(ddb.fileParts))
|
||||
parts = appendNotInMergePartsLocked(parts, ddb.inmemoryParts)
|
||||
parts = appendNotInMergePartsLocked(parts, ddb.fileParts)
|
||||
pws := appendPartsToMerge(nil, parts, maxOutBytes)
|
||||
setInMergeLocked(pws)
|
||||
if len(pws) == 0 {
|
||||
ddb.mergeWorkersCount--
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
|
||||
if len(pws) == 0 {
|
||||
// Nothing to merge at the moment.
|
||||
return
|
||||
}
|
||||
|
||||
partsSize := getCompressedSize(pws)
|
||||
if !ddb.reserveDiskSpace(partsSize) {
|
||||
// There is no free disk space for the merge,
|
||||
// because concurrent merge workers already reserved the disk space.
|
||||
// Try again with smaller maxOutBytes.
|
||||
ddb.releasePartsToMerge(pws)
|
||||
continue
|
||||
}
|
||||
ddb.mustMergeParts(pws, false)
|
||||
ddb.releaseDiskSpace(partsSize)
|
||||
}
|
||||
}
|
||||
|
||||
// appendNotInMergePartsLocked appends src parts with isInMerge=false to dst and returns the result.
|
||||
//
|
||||
// This function must be called under partsLock.
|
||||
func appendNotInMergePartsLocked(dst, src []*partWrapper) []*partWrapper {
|
||||
for _, pw := range src {
|
||||
if !pw.isInMerge {
|
||||
dst = append(dst, pw)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// setInMergeLocked sets isInMerge flag for pws.
|
||||
//
|
||||
// This function must be called under partsLock.
|
||||
func setInMergeLocked(pws []*partWrapper) {
|
||||
for _, pw := range pws {
|
||||
if pw.isInMerge {
|
||||
logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to true")
|
||||
}
|
||||
pw.isInMerge = true
|
||||
}
|
||||
}
|
||||
|
||||
func assertIsInMerge(pws []*partWrapper) {
|
||||
for _, pw := range pws {
|
||||
if !pw.isInMerge {
|
||||
logger.Panicf("BUG: partWrapper.isInMerge unexpectedly set to false")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mustMergeParts merges pws to a single resulting part.
|
||||
//
|
||||
// if isFinal is set, then the resulting part will be saved to disk.
|
||||
//
|
||||
// All the parts inside pws must have isInMerge field set to true.
|
||||
func (ddb *datadb) mustMergeParts(pws []*partWrapper, isFinal bool) {
|
||||
if len(pws) == 0 {
|
||||
// Nothing to merge.
|
||||
return
|
||||
}
|
||||
assertIsInMerge(pws)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Initialize destination paths.
|
||||
dstPartType := ddb.getDstPartType(pws, isFinal)
|
||||
mergeIdx := ddb.nextMergeIdx()
|
||||
dstPartPath := ddb.getDstPartPath(dstPartType, mergeIdx)
|
||||
|
||||
if isFinal && len(pws) == 1 && pws[0].mp != nil {
|
||||
// Fast path: flush a single in-memory part to disk.
|
||||
mp := pws[0].mp
|
||||
mp.MustStoreToDisk(dstPartPath)
|
||||
pwNew := ddb.openCreatedPart(&mp.ph, pws, nil, dstPartPath)
|
||||
ddb.swapSrcWithDstParts(pws, pwNew, dstPartType)
|
||||
return
|
||||
}
|
||||
|
||||
// Prepare blockStreamReaders for source parts.
|
||||
bsrs := mustOpenBlockStreamReaders(pws)
|
||||
|
||||
// Prepare BlockStreamWriter for destination part.
|
||||
srcSize := uint64(0)
|
||||
srcRowsCount := uint64(0)
|
||||
srcBlocksCount := uint64(0)
|
||||
for _, pw := range pws {
|
||||
srcSize += pw.p.ph.CompressedSizeBytes
|
||||
srcRowsCount += pw.p.ph.RowsCount
|
||||
srcBlocksCount += pw.p.ph.BlocksCount
|
||||
}
|
||||
bsw := getBlockStreamWriter()
|
||||
var mpNew *inmemoryPart
|
||||
if dstPartType == partInmemory {
|
||||
mpNew = getInmemoryPart()
|
||||
bsw.MustInitForInmemoryPart(mpNew)
|
||||
} else {
|
||||
nocache := !shouldUsePageCacheForPartSize(srcSize)
|
||||
bsw.MustInitForFilePart(dstPartPath, nocache)
|
||||
}
|
||||
|
||||
// Merge source parts to destination part.
|
||||
var ph partHeader
|
||||
stopCh := ddb.stopCh
|
||||
if isFinal {
|
||||
// The final merge shouldn't be stopped even if ddb.stopCh is closed.
|
||||
stopCh = nil
|
||||
}
|
||||
mustMergeBlockStreams(&ph, bsw, bsrs, stopCh)
|
||||
putBlockStreamWriter(bsw)
|
||||
for _, bsr := range bsrs {
|
||||
putBlockStreamReader(bsr)
|
||||
}
|
||||
|
||||
// Persist partHeader for destination part after the merge.
|
||||
if mpNew != nil {
|
||||
mpNew.ph = ph
|
||||
} else {
|
||||
ph.mustWriteMetadata(dstPartPath)
|
||||
// Make sure the created part directory listing is synced.
|
||||
fs.MustSyncPath(dstPartPath)
|
||||
}
|
||||
if needStop(stopCh) {
|
||||
ddb.releasePartsToMerge(pws)
|
||||
ddb.mergeDoneCond.Broadcast()
|
||||
// Remove incomplete destination part
|
||||
if dstPartType == partFile {
|
||||
fs.MustRemoveAll(dstPartPath)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Atomically swap the source parts with the newly created part.
|
||||
pwNew := ddb.openCreatedPart(&ph, pws, mpNew, dstPartPath)
|
||||
|
||||
dstSize := uint64(0)
|
||||
dstRowsCount := uint64(0)
|
||||
dstBlocksCount := uint64(0)
|
||||
if pwNew != nil {
|
||||
pDst := pwNew.p
|
||||
dstSize = pDst.ph.CompressedSizeBytes
|
||||
dstRowsCount = pDst.ph.RowsCount
|
||||
dstBlocksCount = pDst.ph.BlocksCount
|
||||
}
|
||||
|
||||
ddb.swapSrcWithDstParts(pws, pwNew, dstPartType)
|
||||
|
||||
d := time.Since(startTime)
|
||||
if d <= 30*time.Second {
|
||||
return
|
||||
}
|
||||
|
||||
// Log stats for long merges.
|
||||
durationSecs := d.Seconds()
|
||||
rowsPerSec := int(float64(srcRowsCount) / durationSecs)
|
||||
logger.Infof("merged (%d parts, %d rows, %d blocks, %d bytes) into (1 part, %d rows, %d blocks, %d bytes) in %.3f seconds at %d rows/sec to %q",
|
||||
len(pws), srcRowsCount, srcBlocksCount, srcSize, dstRowsCount, dstBlocksCount, dstSize, durationSecs, rowsPerSec, dstPartPath)
|
||||
}
|
||||
|
||||
func (ddb *datadb) nextMergeIdx() uint64 {
|
||||
return atomic.AddUint64(&ddb.mergeIdx, 1)
|
||||
}
|
||||
|
||||
type partType int
|
||||
|
||||
var (
|
||||
partInmemory = partType(0)
|
||||
partFile = partType(1)
|
||||
)
|
||||
|
||||
func (ddb *datadb) getDstPartType(pws []*partWrapper, isFinal bool) partType {
|
||||
if isFinal {
|
||||
return partFile
|
||||
}
|
||||
dstPartSize := getCompressedSize(pws)
|
||||
if dstPartSize > getMaxInmemoryPartSize() {
|
||||
return partFile
|
||||
}
|
||||
if !areAllInmemoryParts(pws) {
|
||||
// If at least a single source part is located in file,
|
||||
// then the destination part must be in file for durability reasons.
|
||||
return partFile
|
||||
}
|
||||
return partInmemory
|
||||
}
|
||||
|
||||
func (ddb *datadb) getDstPartPath(dstPartType partType, mergeIdx uint64) string {
|
||||
ptPath := ddb.path
|
||||
dstPartPath := ""
|
||||
if dstPartType != partInmemory {
|
||||
dstPartPath = filepath.Join(ptPath, fmt.Sprintf("%016X", mergeIdx))
|
||||
}
|
||||
return dstPartPath
|
||||
}
|
||||
|
||||
func (ddb *datadb) openCreatedPart(ph *partHeader, pws []*partWrapper, mpNew *inmemoryPart, dstPartPath string) *partWrapper {
|
||||
// Open the created part.
|
||||
if ph.RowsCount == 0 {
|
||||
// The created part is empty. Remove it
|
||||
if mpNew == nil {
|
||||
fs.MustRemoveAll(dstPartPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
var p *part
|
||||
var flushDeadline time.Time
|
||||
if mpNew != nil {
|
||||
// Open the created part from memory.
|
||||
p = mustOpenInmemoryPart(ddb.pt, mpNew)
|
||||
flushDeadline = ddb.getFlushToDiskDeadline(pws)
|
||||
} else {
|
||||
// Open the created part from disk.
|
||||
p = mustOpenFilePart(ddb.pt, dstPartPath)
|
||||
}
|
||||
return newPartWrapper(p, mpNew, flushDeadline)
|
||||
}
|
||||
|
||||
func (ddb *datadb) mustAddRows(lr *LogRows) {
|
||||
if len(lr.streamIDs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
mp := getInmemoryPart()
|
||||
mp.mustInitFromRows(lr)
|
||||
p := mustOpenInmemoryPart(ddb.pt, mp)
|
||||
|
||||
flushDeadline := time.Now().Add(ddb.flushInterval)
|
||||
pw := newPartWrapper(p, mp, flushDeadline)
|
||||
|
||||
ddb.partsLock.Lock()
|
||||
ddb.inmemoryParts = append(ddb.inmemoryParts, pw)
|
||||
ddb.startInmemoryPartsFlusherLocked()
|
||||
if len(ddb.inmemoryParts) > defaultPartsToMerge {
|
||||
ddb.startMergeWorkerLocked()
|
||||
}
|
||||
for len(ddb.inmemoryParts) > maxInmemoryPartsPerPartition {
|
||||
// limit the pace for data ingestion if too many inmemory parts are created
|
||||
ddb.mergeDoneCond.Wait()
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
}
|
||||
|
||||
// DatadbStats contains various stats for datadb.
|
||||
type DatadbStats struct {
|
||||
// InmemoryRowsCount is the number of rows, which weren't flushed to disk yet.
|
||||
InmemoryRowsCount uint64
|
||||
|
||||
// FileRowsCount is the number of rows stored on disk.
|
||||
FileRowsCount uint64
|
||||
|
||||
// InmemoryParts is the number of in-memory parts, which weren't flushed to disk yet.
|
||||
InmemoryParts uint64
|
||||
|
||||
// FileParts is the number of file-based parts stored on disk.
|
||||
FileParts uint64
|
||||
|
||||
// InmemoryBlocks is the number of in-memory blocks, which weren't flushed to disk yet.
|
||||
InmemoryBlocks uint64
|
||||
|
||||
// FileBlocks is the number of file-based blocks stored on disk.
|
||||
FileBlocks uint64
|
||||
|
||||
// CompressedInmemorySize is the size of compressed data stored in memory.
|
||||
CompressedInmemorySize uint64
|
||||
|
||||
// CompressedFileSize is the size of compressed data stored on disk.
|
||||
CompressedFileSize uint64
|
||||
|
||||
// UncompressedInmemorySize is the size of uncompressed data stored in memory.
|
||||
UncompressedInmemorySize uint64
|
||||
|
||||
// UncompressedFileSize is the size of uncompressed data stored on disk.
|
||||
UncompressedFileSize uint64
|
||||
}
|
||||
|
||||
func (s *DatadbStats) reset() {
|
||||
*s = DatadbStats{}
|
||||
}
|
||||
|
||||
// RowsCount returns the number of rows stored in datadb.
|
||||
func (s *DatadbStats) RowsCount() uint64 {
|
||||
return s.InmemoryRowsCount + s.FileRowsCount
|
||||
}
|
||||
|
||||
// updateStats updates s with ddb stats
|
||||
func (ddb *datadb) updateStats(s *DatadbStats) {
|
||||
ddb.partsLock.Lock()
|
||||
|
||||
s.InmemoryRowsCount += getRowsCount(ddb.inmemoryParts)
|
||||
s.FileRowsCount += getRowsCount(ddb.fileParts)
|
||||
|
||||
s.InmemoryParts += uint64(len(ddb.inmemoryParts))
|
||||
s.FileParts += uint64(len(ddb.fileParts))
|
||||
|
||||
s.InmemoryBlocks += getBlocksCount(ddb.inmemoryParts)
|
||||
s.FileBlocks += getBlocksCount(ddb.fileParts)
|
||||
|
||||
s.CompressedInmemorySize += getCompressedSize(ddb.inmemoryParts)
|
||||
s.CompressedFileSize += getCompressedSize(ddb.fileParts)
|
||||
|
||||
s.UncompressedInmemorySize += getUncompressedSize(ddb.inmemoryParts)
|
||||
s.UncompressedFileSize += getUncompressedSize(ddb.fileParts)
|
||||
|
||||
ddb.partsLock.Unlock()
|
||||
}
|
||||
|
||||
// debugFlush() makes sure that the recently ingested data is availalbe for search.
|
||||
func (ddb *datadb) debugFlush() {
|
||||
// Nothing to do, since all the ingested data is available for search via ddb.inmemoryParts.
|
||||
}
|
||||
|
||||
func (ddb *datadb) mustMergePartsFinal(pws []*partWrapper) {
|
||||
assertIsInMerge(pws)
|
||||
|
||||
var pwsChunk []*partWrapper
|
||||
for len(pws) > 0 {
|
||||
pwsChunk = appendPartsToMerge(pwsChunk[:0], pws, (1<<64)-1)
|
||||
if len(pwsChunk) == 0 {
|
||||
pwsChunk = append(pwsChunk[:0], pws...)
|
||||
}
|
||||
ddb.mustMergeParts(pwsChunk, true)
|
||||
|
||||
partsToRemove := partsToMap(pwsChunk)
|
||||
removedParts := 0
|
||||
pws, removedParts = removeParts(pws, partsToRemove)
|
||||
if removedParts != len(pwsChunk) {
|
||||
logger.Panicf("BUG: unexpected number of parts removed; got %d; want %d", removedParts, len(pwsChunk))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func partsToMap(pws []*partWrapper) map[*partWrapper]struct{} {
|
||||
m := make(map[*partWrapper]struct{}, len(pws))
|
||||
for _, pw := range pws {
|
||||
m[pw] = struct{}{}
|
||||
}
|
||||
if len(m) != len(pws) {
|
||||
logger.Panicf("BUG: %d duplicate parts found out of %d parts", len(pws)-len(m), len(pws))
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func (ddb *datadb) swapSrcWithDstParts(pws []*partWrapper, pwNew *partWrapper, dstPartType partType) {
|
||||
// Atomically unregister old parts and add new part to pt.
|
||||
partsToRemove := partsToMap(pws)
|
||||
removedInmemoryParts := 0
|
||||
removedFileParts := 0
|
||||
|
||||
ddb.partsLock.Lock()
|
||||
|
||||
ddb.inmemoryParts, removedInmemoryParts = removeParts(ddb.inmemoryParts, partsToRemove)
|
||||
ddb.fileParts, removedFileParts = removeParts(ddb.fileParts, partsToRemove)
|
||||
if pwNew != nil {
|
||||
switch dstPartType {
|
||||
case partInmemory:
|
||||
ddb.inmemoryParts = append(ddb.inmemoryParts, pwNew)
|
||||
ddb.startInmemoryPartsFlusherLocked()
|
||||
case partFile:
|
||||
ddb.fileParts = append(ddb.fileParts, pwNew)
|
||||
default:
|
||||
logger.Panicf("BUG: unknown partType=%d", dstPartType)
|
||||
}
|
||||
if len(ddb.inmemoryParts)+len(ddb.fileParts) > defaultPartsToMerge {
|
||||
ddb.startMergeWorkerLocked()
|
||||
}
|
||||
}
|
||||
|
||||
// Atomically store the updated list of file-based parts on disk.
|
||||
// This must be performed under partsLock in order to prevent from races
|
||||
// when multiple concurrently running goroutines update the list.
|
||||
if removedFileParts > 0 || pwNew != nil && dstPartType == partFile {
|
||||
partNames := getPartNames(ddb.fileParts)
|
||||
mustWritePartNames(ddb.path, partNames)
|
||||
}
|
||||
|
||||
ddb.partsLock.Unlock()
|
||||
|
||||
removedParts := removedInmemoryParts + removedFileParts
|
||||
if removedParts != len(partsToRemove) {
|
||||
logger.Panicf("BUG: unexpected number of parts removed; got %d, want %d", removedParts, len(partsToRemove))
|
||||
}
|
||||
|
||||
// Mark old parts as must be deleted and decrement reference count,
|
||||
// so they are eventually closed and deleted.
|
||||
for _, pw := range pws {
|
||||
atomic.StoreUint32(&pw.mustBeDeleted, 1)
|
||||
pw.decRef()
|
||||
}
|
||||
|
||||
ddb.mergeDoneCond.Broadcast()
|
||||
}
|
||||
|
||||
func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]struct{}) ([]*partWrapper, int) {
|
||||
dst := pws[:0]
|
||||
for _, pw := range pws {
|
||||
if _, ok := partsToRemove[pw]; !ok {
|
||||
dst = append(dst, pw)
|
||||
}
|
||||
}
|
||||
for i := len(dst); i < len(pws); i++ {
|
||||
pws[i] = nil
|
||||
}
|
||||
return dst, len(pws) - len(dst)
|
||||
}
|
||||
|
||||
func mustOpenBlockStreamReaders(pws []*partWrapper) []*blockStreamReader {
|
||||
bsrs := make([]*blockStreamReader, 0, len(pws))
|
||||
for _, pw := range pws {
|
||||
bsr := getBlockStreamReader()
|
||||
if pw.mp != nil {
|
||||
bsr.MustInitFromInmemoryPart(pw.mp)
|
||||
} else {
|
||||
bsr.MustInitFromFilePart(pw.p.path)
|
||||
}
|
||||
bsrs = append(bsrs, bsr)
|
||||
}
|
||||
return bsrs
|
||||
}
|
||||
|
||||
func newPartWrapper(p *part, mp *inmemoryPart, flushDeadline time.Time) *partWrapper {
|
||||
pw := &partWrapper{
|
||||
p: p,
|
||||
mp: mp,
|
||||
|
||||
flushDeadline: flushDeadline,
|
||||
}
|
||||
|
||||
// Increase reference counter for newly created part - it is decreased when the part
|
||||
// is removed from the list of open parts.
|
||||
pw.incRef()
|
||||
|
||||
return pw
|
||||
}
|
||||
|
||||
func (ddb *datadb) getFlushToDiskDeadline(pws []*partWrapper) time.Time {
|
||||
d := time.Now().Add(ddb.flushInterval)
|
||||
for _, pw := range pws {
|
||||
if pw.mp != nil && pw.flushDeadline.Before(d) {
|
||||
d = pw.flushDeadline
|
||||
}
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func getMaxInmemoryPartSize() uint64 {
|
||||
// Allocate 10% of allowed memory for in-memory parts.
|
||||
n := uint64(0.1 * float64(memory.Allowed()) / maxInmemoryPartsPerPartition)
|
||||
if n < 1e6 {
|
||||
n = 1e6
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func areAllInmemoryParts(pws []*partWrapper) bool {
|
||||
for _, pw := range pws {
|
||||
if pw.mp == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (ddb *datadb) releasePartsToMerge(pws []*partWrapper) {
|
||||
ddb.partsLock.Lock()
|
||||
for _, pw := range pws {
|
||||
if !pw.isInMerge {
|
||||
logger.Panicf("BUG: missing isInMerge flag on the part %q", pw.p.path)
|
||||
}
|
||||
pw.isInMerge = false
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
}
|
||||
|
||||
func (ddb *datadb) availableDiskSpace() uint64 {
|
||||
available := fs.MustGetFreeSpace(ddb.path)
|
||||
reserved := atomic.LoadUint64(&reservedDiskSpace)
|
||||
if available < reserved {
|
||||
return 0
|
||||
}
|
||||
return available - reserved
|
||||
}
|
||||
|
||||
func (ddb *datadb) reserveDiskSpace(n uint64) bool {
|
||||
available := fs.MustGetFreeSpace(ddb.path)
|
||||
reserved := atomic.AddUint64(&reservedDiskSpace, n)
|
||||
if available > reserved {
|
||||
return true
|
||||
}
|
||||
ddb.releaseDiskSpace(n)
|
||||
return false
|
||||
}
|
||||
|
||||
func (ddb *datadb) releaseDiskSpace(n uint64) {
|
||||
atomic.AddUint64(&reservedDiskSpace, -n)
|
||||
}
|
||||
|
||||
// reservedDiskSpace tracks global reserved disk space for currently executed
|
||||
// background merges across all the partitions.
|
||||
//
|
||||
// It should allow avoiding background merges when there is no free disk space.
|
||||
var reservedDiskSpace uint64
|
||||
|
||||
func needStop(stopCh <-chan struct{}) bool {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// mustCloseDatadb can be called only when nobody accesses ddb.
|
||||
func mustCloseDatadb(ddb *datadb) {
|
||||
// Stop background workers
|
||||
close(ddb.stopCh)
|
||||
ddb.wg.Wait()
|
||||
|
||||
// flush in-memory data to disk
|
||||
pws := append([]*partWrapper{}, ddb.inmemoryParts...)
|
||||
setInMergeLocked(pws)
|
||||
ddb.mustMergePartsFinal(pws)
|
||||
|
||||
// There is no need in using ddb.partsLock here, since nobody should acces ddb now.
|
||||
for _, pw := range ddb.inmemoryParts {
|
||||
pw.decRef()
|
||||
if pw.refCount != 0 {
|
||||
logger.Panicf("BUG: there are %d references to inmemoryPart", pw.refCount)
|
||||
}
|
||||
}
|
||||
ddb.inmemoryParts = nil
|
||||
|
||||
for _, pw := range ddb.fileParts {
|
||||
pw.decRef()
|
||||
if pw.refCount != 0 {
|
||||
logger.Panicf("BUG: ther are %d references to filePart", pw.refCount)
|
||||
}
|
||||
}
|
||||
ddb.fileParts = nil
|
||||
|
||||
ddb.path = ""
|
||||
ddb.pt = nil
|
||||
}
|
||||
|
||||
func getPartNames(pws []*partWrapper) []string {
|
||||
partNames := make([]string, 0, len(pws))
|
||||
for _, pw := range pws {
|
||||
if pw.mp != nil {
|
||||
// Skip in-memory parts
|
||||
continue
|
||||
}
|
||||
partName := filepath.Base(pw.p.path)
|
||||
partNames = append(partNames, partName)
|
||||
}
|
||||
sort.Strings(partNames)
|
||||
return partNames
|
||||
}
|
||||
|
||||
func mustWritePartNames(path string, partNames []string) {
|
||||
data, err := json.Marshal(partNames)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot marshal partNames to JSON: %s", err)
|
||||
}
|
||||
partNamesPath := filepath.Join(path, partsFilename)
|
||||
fs.MustWriteAtomic(partNamesPath, data, true)
|
||||
}
|
||||
|
||||
func mustReadPartNames(path string) []string {
|
||||
partNamesPath := filepath.Join(path, partsFilename)
|
||||
data, err := os.ReadFile(partNamesPath)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot read %s: %s", partNamesPath, err)
|
||||
}
|
||||
var partNames []string
|
||||
if err := json.Unmarshal(data, &partNames); err != nil {
|
||||
logger.Panicf("FATAL: cannot parse %s: %s", partNamesPath, err)
|
||||
}
|
||||
return partNames
|
||||
}
|
||||
|
||||
// mustRemoveUnusedDirs removes dirs at path, which are missing in partNames.
|
||||
//
|
||||
// These dirs may be left after unclean shutdown.
|
||||
func mustRemoveUnusedDirs(path string, partNames []string) {
|
||||
des := fs.MustReadDir(path)
|
||||
m := make(map[string]struct{}, len(partNames))
|
||||
for _, partName := range partNames {
|
||||
m[partName] = struct{}{}
|
||||
}
|
||||
removedDirs := 0
|
||||
for _, de := range des {
|
||||
if !fs.IsDirOrSymlink(de) {
|
||||
// Skip non-directories.
|
||||
continue
|
||||
}
|
||||
fn := de.Name()
|
||||
if _, ok := m[fn]; !ok {
|
||||
deletePath := filepath.Join(path, fn)
|
||||
fs.MustRemoveAll(deletePath)
|
||||
removedDirs++
|
||||
}
|
||||
}
|
||||
if removedDirs > 0 {
|
||||
fs.MustSyncPath(path)
|
||||
}
|
||||
}
|
||||
|
||||
// appendPartsToMerge finds optimal parts to merge from src,
|
||||
// appends them to dst and returns the result.
|
||||
func appendPartsToMerge(dst, src []*partWrapper, maxOutBytes uint64) []*partWrapper {
|
||||
if len(src) < 2 {
|
||||
// There is no need in merging zero or one part :)
|
||||
return dst
|
||||
}
|
||||
|
||||
// Filter out too big parts.
|
||||
// This should reduce N for O(N^2) algorithm below.
|
||||
maxInPartBytes := uint64(float64(maxOutBytes) / minMergeMultiplier)
|
||||
tmp := make([]*partWrapper, 0, len(src))
|
||||
for _, pw := range src {
|
||||
if pw.p.ph.CompressedSizeBytes > maxInPartBytes {
|
||||
continue
|
||||
}
|
||||
tmp = append(tmp, pw)
|
||||
}
|
||||
src = tmp
|
||||
|
||||
sortPartsForOptimalMerge(src)
|
||||
|
||||
maxSrcParts := defaultPartsToMerge
|
||||
if maxSrcParts > len(src) {
|
||||
maxSrcParts = len(src)
|
||||
}
|
||||
minSrcParts := (maxSrcParts + 1) / 2
|
||||
if minSrcParts < 2 {
|
||||
minSrcParts = 2
|
||||
}
|
||||
|
||||
// Exhaustive search for parts giving the lowest write amplification when merged.
|
||||
var pws []*partWrapper
|
||||
maxM := float64(0)
|
||||
for i := minSrcParts; i <= maxSrcParts; i++ {
|
||||
for j := 0; j <= len(src)-i; j++ {
|
||||
a := src[j : j+i]
|
||||
if a[0].p.ph.CompressedSizeBytes*uint64(len(a)) < a[len(a)-1].p.ph.CompressedSizeBytes {
|
||||
// Do not merge parts with too big difference in size,
|
||||
// since this results in unbalanced merges.
|
||||
continue
|
||||
}
|
||||
outSize := getCompressedSize(a)
|
||||
if outSize > maxOutBytes {
|
||||
// There is no need in verifying remaining parts with bigger sizes.
|
||||
break
|
||||
}
|
||||
m := float64(outSize) / float64(a[len(a)-1].p.ph.CompressedSizeBytes)
|
||||
if m < maxM {
|
||||
continue
|
||||
}
|
||||
maxM = m
|
||||
pws = a
|
||||
}
|
||||
}
|
||||
|
||||
minM := float64(defaultPartsToMerge) / 2
|
||||
if minM < minMergeMultiplier {
|
||||
minM = minMergeMultiplier
|
||||
}
|
||||
if maxM < minM {
|
||||
// There is no sense in merging parts with too small m,
|
||||
// since this leads to high disk write IO.
|
||||
return dst
|
||||
}
|
||||
return append(dst, pws...)
|
||||
}
|
||||
|
||||
func sortPartsForOptimalMerge(pws []*partWrapper) {
|
||||
// Sort src parts by size and backwards timestamp.
|
||||
// This should improve adjanced points' locality in the merged parts.
|
||||
sort.Slice(pws, func(i, j int) bool {
|
||||
a := &pws[i].p.ph
|
||||
b := &pws[j].p.ph
|
||||
if a.CompressedSizeBytes == b.CompressedSizeBytes {
|
||||
return a.MinTimestamp > b.MinTimestamp
|
||||
}
|
||||
return a.CompressedSizeBytes < b.CompressedSizeBytes
|
||||
})
|
||||
}
|
||||
|
||||
func getCompressedSize(pws []*partWrapper) uint64 {
|
||||
n := uint64(0)
|
||||
for _, pw := range pws {
|
||||
n += pw.p.ph.CompressedSizeBytes
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func getUncompressedSize(pws []*partWrapper) uint64 {
|
||||
n := uint64(0)
|
||||
for _, pw := range pws {
|
||||
n += pw.p.ph.UncompressedSizeBytes
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func getRowsCount(pws []*partWrapper) uint64 {
|
||||
n := uint64(0)
|
||||
for _, pw := range pws {
|
||||
n += pw.p.ph.RowsCount
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func getBlocksCount(pws []*partWrapper) uint64 {
|
||||
n := uint64(0)
|
||||
for _, pw := range pws {
|
||||
n += pw.p.ph.BlocksCount
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func shouldUsePageCacheForPartSize(size uint64) bool {
|
||||
mem := memory.Remaining() / defaultPartsToMerge
|
||||
return size <= uint64(mem)
|
||||
}
|
91
lib/logstorage/datadb_test.go
Normal file
91
lib/logstorage/datadb_test.go
Normal file
|
@ -0,0 +1,91 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAppendPartsToMergeManyParts(t *testing.T) {
|
||||
// Verify that big number of parts are merged into minimal number of parts
|
||||
// using minimum merges.
|
||||
var sizes []uint64
|
||||
maxOutSize := uint64(0)
|
||||
r := rand.New(rand.NewSource(1))
|
||||
for i := 0; i < 1024; i++ {
|
||||
n := uint64(uint32(r.NormFloat64() * 1e9))
|
||||
n++
|
||||
maxOutSize += n
|
||||
sizes = append(sizes, n)
|
||||
}
|
||||
pws := newTestPartWrappersForSizes(sizes)
|
||||
|
||||
iterationsCount := 0
|
||||
sizeMergedTotal := uint64(0)
|
||||
for {
|
||||
pms := appendPartsToMerge(nil, pws, maxOutSize)
|
||||
if len(pms) == 0 {
|
||||
break
|
||||
}
|
||||
m := make(map[*partWrapper]bool)
|
||||
for _, pw := range pms {
|
||||
m[pw] = true
|
||||
}
|
||||
var pwsNew []*partWrapper
|
||||
size := uint64(0)
|
||||
for _, pw := range pws {
|
||||
if m[pw] {
|
||||
size += pw.p.ph.CompressedSizeBytes
|
||||
} else {
|
||||
pwsNew = append(pwsNew, pw)
|
||||
}
|
||||
}
|
||||
pw := &partWrapper{
|
||||
p: &part{
|
||||
ph: partHeader{
|
||||
CompressedSizeBytes: size,
|
||||
},
|
||||
},
|
||||
}
|
||||
sizeMergedTotal += size
|
||||
pwsNew = append(pwsNew, pw)
|
||||
pws = pwsNew
|
||||
iterationsCount++
|
||||
}
|
||||
sizes = newTestSizesFromPartWrappers(pws)
|
||||
sizeTotal := uint64(0)
|
||||
for _, size := range sizes {
|
||||
sizeTotal += uint64(size)
|
||||
}
|
||||
overhead := float64(sizeMergedTotal) / float64(sizeTotal)
|
||||
if overhead > 2.1 {
|
||||
t.Fatalf("too big overhead; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f",
|
||||
sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead)
|
||||
}
|
||||
if len(sizes) > 18 {
|
||||
t.Fatalf("too many sizes %d; sizes=%d, iterationsCount=%d, sizeTotal=%d, sizeMergedTotal=%d, overhead=%f",
|
||||
len(sizes), sizes, iterationsCount, sizeTotal, sizeMergedTotal, overhead)
|
||||
}
|
||||
}
|
||||
|
||||
func newTestSizesFromPartWrappers(pws []*partWrapper) []uint64 {
|
||||
var sizes []uint64
|
||||
for _, pw := range pws {
|
||||
sizes = append(sizes, pw.p.ph.CompressedSizeBytes)
|
||||
}
|
||||
return sizes
|
||||
}
|
||||
|
||||
func newTestPartWrappersForSizes(sizes []uint64) []*partWrapper {
|
||||
var pws []*partWrapper
|
||||
for _, size := range sizes {
|
||||
pw := &partWrapper{
|
||||
p: &part{
|
||||
ph: partHeader{
|
||||
CompressedSizeBytes: size,
|
||||
},
|
||||
},
|
||||
}
|
||||
pws = append(pws, pw)
|
||||
}
|
||||
return pws
|
||||
}
|
314
lib/logstorage/encoding.go
Normal file
314
lib/logstorage/encoding.go
Normal file
|
@ -0,0 +1,314 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
// marshalStringsBlock marshals a and appends the result to dst.
|
||||
//
|
||||
// The marshaled strings block can be unmarshaled with stringsBlockUnmarshaler.
|
||||
func marshalStringsBlock(dst []byte, a []string) []byte {
|
||||
// Encode string lengths
|
||||
u64s := encoding.GetUint64s(len(a))
|
||||
aLens := u64s.A[:0]
|
||||
for _, s := range a {
|
||||
aLens = append(aLens, uint64(len(s)))
|
||||
}
|
||||
u64s.A = aLens
|
||||
dst = marshalUint64Block(dst, u64s.A)
|
||||
encoding.PutUint64s(u64s)
|
||||
|
||||
// Encode strings
|
||||
bb := bbPool.Get()
|
||||
b := bb.B
|
||||
for _, s := range a {
|
||||
b = append(b, s...)
|
||||
}
|
||||
bb.B = b
|
||||
dst = marshalBytesBlock(dst, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// stringsBlockUnmarshaler is used for unmarshaling the block returned from marshalStringsBlock()
|
||||
//
|
||||
// use getStringsBlockUnmarshaler() for obtaining the unmarshaler from the pool in order to save memory allocations.
|
||||
type stringsBlockUnmarshaler struct {
|
||||
// data contains the data for the unmarshaled values
|
||||
data []byte
|
||||
}
|
||||
|
||||
func (sbu *stringsBlockUnmarshaler) reset() {
|
||||
sbu.data = sbu.data[:0]
|
||||
}
|
||||
|
||||
// unmarshal unmarshals itemsCount strings from src, appends them to dst and returns the result.
|
||||
//
|
||||
// The returned strings are valid until sbu.reset() call.
|
||||
func (sbu *stringsBlockUnmarshaler) unmarshal(dst []string, src []byte, itemsCount uint64) ([]string, error) {
|
||||
u64s := encoding.GetUint64s(0)
|
||||
defer encoding.PutUint64s(u64s)
|
||||
|
||||
// Decode string lengths
|
||||
var tail []byte
|
||||
var err error
|
||||
u64s.A, tail, err = unmarshalUint64Block(u64s.A[:0], src, itemsCount)
|
||||
if err != nil {
|
||||
return dst, fmt.Errorf("cannot unmarshal string lengths: %w", err)
|
||||
}
|
||||
aLens := u64s.A
|
||||
src = tail
|
||||
|
||||
// Read bytes block into sbu.data
|
||||
dataLen := len(sbu.data)
|
||||
sbu.data, tail, err = unmarshalBytesBlock(sbu.data, src)
|
||||
if err != nil {
|
||||
return dst, fmt.Errorf("cannot unmarshal bytes block with strings: %w", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
return dst, fmt.Errorf("unexpected non-empty tail after reading bytes block with strings; len(tail)=%d", len(tail))
|
||||
}
|
||||
|
||||
// Decode strings from sbu.data into dst
|
||||
data := sbu.data[dataLen:]
|
||||
for _, sLen := range aLens {
|
||||
if uint64(len(data)) < sLen {
|
||||
return dst, fmt.Errorf("cannot unmarshal a string with the length %d bytes from %d bytes", sLen, len(data))
|
||||
}
|
||||
s := bytesutil.ToUnsafeString(data[:sLen])
|
||||
data = data[sLen:]
|
||||
dst = append(dst, s)
|
||||
}
|
||||
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// marshalUint64Block appends marshaled a to dst and returns the result.
|
||||
func marshalUint64Block(dst []byte, a []uint64) []byte {
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalUint64Items(bb.B[:0], a)
|
||||
dst = marshalBytesBlock(dst, bb.B)
|
||||
bbPool.Put(bb)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshalUint64Block appends unmarshaled from src itemsCount uint64 items to dst and returns the result.
|
||||
func unmarshalUint64Block(dst []uint64, src []byte, itemsCount uint64) ([]uint64, []byte, error) {
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
// Unmarshal the underlying bytes block
|
||||
var err error
|
||||
bb.B, src, err = unmarshalBytesBlock(bb.B[:0], src)
|
||||
if err != nil {
|
||||
return dst, src, fmt.Errorf("cannot unmarshal bytes block: %w", err)
|
||||
}
|
||||
|
||||
// Unmarshal the items from bb.
|
||||
dst, err = unmarshalUint64Items(dst, bb.B, itemsCount)
|
||||
if err != nil {
|
||||
return dst, src, fmt.Errorf("cannot unmarshal %d uint64 items from bytes block of length %d bytes: %w", itemsCount, len(bb.B), err)
|
||||
}
|
||||
return dst, src, nil
|
||||
}
|
||||
|
||||
const (
|
||||
uintBlockType8 = 0
|
||||
uintBlockType16 = 1
|
||||
uintBlockType32 = 2
|
||||
uintBlockType64 = 3
|
||||
)
|
||||
|
||||
// marshalUint64Items appends the marshaled a items to dst and returns the result.
|
||||
func marshalUint64Items(dst []byte, a []uint64) []byte {
|
||||
// Do not marshal len(a), since it is expected that unmarshaler knows it.
|
||||
nMax := uint64(0)
|
||||
for _, n := range a {
|
||||
if n > nMax {
|
||||
nMax = n
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case nMax < (1 << 8):
|
||||
dst = append(dst, uintBlockType8)
|
||||
for _, n := range a {
|
||||
dst = append(dst, byte(n))
|
||||
}
|
||||
case nMax < (1 << 16):
|
||||
dst = append(dst, uintBlockType16)
|
||||
for _, n := range a {
|
||||
dst = encoding.MarshalUint16(dst, uint16(n))
|
||||
}
|
||||
case nMax < (1 << 32):
|
||||
dst = append(dst, uintBlockType32)
|
||||
for _, n := range a {
|
||||
dst = encoding.MarshalUint32(dst, uint32(n))
|
||||
}
|
||||
default:
|
||||
dst = append(dst, uintBlockType64)
|
||||
for _, n := range a {
|
||||
dst = encoding.MarshalUint64(dst, uint64(n))
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshalUint64Items appends unmarshaled from src itemsCount uint64 items to dst and returns the result.
|
||||
func unmarshalUint64Items(dst []uint64, src []byte, itemsCount uint64) ([]uint64, error) {
|
||||
// Unmarshal block type
|
||||
if len(src) < 1 {
|
||||
return dst, fmt.Errorf("cannot unmarshal uint64 block type from empty src")
|
||||
}
|
||||
blockType := src[0]
|
||||
src = src[1:]
|
||||
|
||||
switch blockType {
|
||||
case uintBlockType8:
|
||||
// A block with items smaller than 1<<8 bytes
|
||||
if uint64(len(src)) != itemsCount {
|
||||
return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), itemsCount)
|
||||
}
|
||||
for _, v := range src {
|
||||
dst = append(dst, uint64(v))
|
||||
}
|
||||
case uintBlockType16:
|
||||
// A block with items smaller than 1<<16 bytes
|
||||
if uint64(len(src)) != 2*itemsCount {
|
||||
return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 2*itemsCount)
|
||||
}
|
||||
for len(src) > 0 {
|
||||
v := encoding.UnmarshalUint16(src)
|
||||
src = src[2:]
|
||||
dst = append(dst, uint64(v))
|
||||
}
|
||||
case uintBlockType32:
|
||||
// A block with items smaller than 1<<32 bytes
|
||||
if uint64(len(src)) != 4*itemsCount {
|
||||
return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 4*itemsCount)
|
||||
}
|
||||
for len(src) > 0 {
|
||||
v := encoding.UnmarshalUint32(src)
|
||||
src = src[4:]
|
||||
dst = append(dst, uint64(v))
|
||||
}
|
||||
case uintBlockType64:
|
||||
// A block with items smaller than 1<<64 bytes
|
||||
if uint64(len(src)) != 8*itemsCount {
|
||||
return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 8*itemsCount)
|
||||
}
|
||||
for len(src) > 0 {
|
||||
v := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
dst = append(dst, v)
|
||||
}
|
||||
default:
|
||||
return dst, fmt.Errorf("unexpected uint64 block type: %d; want 0, 1, 2 or 3", blockType)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
const (
|
||||
marshalBytesTypePlain = 0
|
||||
marshalBytesTypeZSTD = 1
|
||||
)
|
||||
|
||||
func marshalBytesBlock(dst, src []byte) []byte {
|
||||
if len(src) < 128 {
|
||||
// Marshal the block in plain without compression
|
||||
dst = append(dst, marshalBytesTypePlain)
|
||||
dst = append(dst, byte(len(src)))
|
||||
return append(dst, src...)
|
||||
}
|
||||
|
||||
// Compress the block
|
||||
dst = append(dst, marshalBytesTypeZSTD)
|
||||
bb := bbPool.Get()
|
||||
bb.B = encoding.CompressZSTDLevel(bb.B[:0], src, 1)
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(bb.B)))
|
||||
dst = append(dst, bb.B...)
|
||||
bbPool.Put(bb)
|
||||
return dst
|
||||
}
|
||||
|
||||
func unmarshalBytesBlock(dst, src []byte) ([]byte, []byte, error) {
|
||||
if len(src) < 1 {
|
||||
return dst, src, fmt.Errorf("cannot unmarshal block type from empty src")
|
||||
}
|
||||
blockType := src[0]
|
||||
src = src[1:]
|
||||
switch blockType {
|
||||
case marshalBytesTypePlain:
|
||||
// Plain block
|
||||
|
||||
// Read block length
|
||||
if len(src) < 1 {
|
||||
return dst, src, fmt.Errorf("cannot unmarshal plain block size from empty src")
|
||||
}
|
||||
blockLen := int(src[0])
|
||||
src = src[1:]
|
||||
if len(src) < blockLen {
|
||||
return dst, src, fmt.Errorf("cannot read plain block with the size %d bytes from %b bytes", blockLen, len(src))
|
||||
}
|
||||
|
||||
// Copy the block to dst
|
||||
dst = append(dst, src[:blockLen]...)
|
||||
src = src[blockLen:]
|
||||
return dst, src, nil
|
||||
case marshalBytesTypeZSTD:
|
||||
// Compressed block
|
||||
|
||||
// Read block length
|
||||
tail, blockLen, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return dst, src, fmt.Errorf("cannot unmarshal compressed block size: %w", err)
|
||||
}
|
||||
src = tail
|
||||
if uint64(len(src)) < blockLen {
|
||||
return dst, src, fmt.Errorf("cannot read compressed block with the size %d bytes from %d bytes", blockLen, len(src))
|
||||
}
|
||||
compressedBlock := src[:blockLen]
|
||||
src = src[blockLen:]
|
||||
|
||||
// Decompress the block
|
||||
bb := bbPool.Get()
|
||||
bb.B, err = encoding.DecompressZSTD(bb.B[:0], compressedBlock)
|
||||
if err != nil {
|
||||
return dst, src, fmt.Errorf("cannot decompress block: %w", err)
|
||||
}
|
||||
|
||||
// Copy the decompressed block to dst.
|
||||
dst = append(dst, bb.B...)
|
||||
bbPool.Put(bb)
|
||||
return dst, src, nil
|
||||
default:
|
||||
return dst, src, fmt.Errorf("unexpected block type: %d; supported types: 0, 1", blockType)
|
||||
}
|
||||
}
|
||||
|
||||
var bbPool bytesutil.ByteBufferPool
|
||||
|
||||
// getStringsBlockUnmarshaler returns stringsBlockUnmarshaler from the pool.
|
||||
//
|
||||
// Return back the stringsBlockUnmarshaler to the pool by calling putStringsBlockUnmarshaler().
|
||||
func getStringsBlockUnmarshaler() *stringsBlockUnmarshaler {
|
||||
v := sbuPool.Get()
|
||||
if v == nil {
|
||||
return &stringsBlockUnmarshaler{}
|
||||
}
|
||||
return v.(*stringsBlockUnmarshaler)
|
||||
}
|
||||
|
||||
// putStringsBlockUnmarshaler returns back sbu to the pool.
|
||||
//
|
||||
// sbu mustn't be used after returning to the pool.
|
||||
func putStringsBlockUnmarshaler(sbu *stringsBlockUnmarshaler) {
|
||||
sbu.reset()
|
||||
sbuPool.Put(sbu)
|
||||
}
|
||||
|
||||
var sbuPool sync.Pool
|
86
lib/logstorage/encoding_test.go
Normal file
86
lib/logstorage/encoding_test.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMarshalUnmarshalStringsBlock(t *testing.T) {
|
||||
f := func(logs string, blockLenExpected int) {
|
||||
t.Helper()
|
||||
var a []string
|
||||
if logs != "" {
|
||||
a = strings.Split(logs, "\n")
|
||||
}
|
||||
data := marshalStringsBlock(nil, a)
|
||||
if len(data) != blockLenExpected {
|
||||
t.Fatalf("unexpected block length; got %d; want %d; block=%q", len(data), blockLenExpected, data)
|
||||
}
|
||||
sbu := getStringsBlockUnmarshaler()
|
||||
values, err := sbu.unmarshal(nil, data, uint64(len(a)))
|
||||
if err != nil {
|
||||
t.Fatalf("cannot unmarshal strings block: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(values, a) {
|
||||
t.Fatalf("unexpected strings after unmarshaling;\ngot\n%q\nwant\n%q", values, a)
|
||||
}
|
||||
putStringsBlockUnmarshaler(sbu)
|
||||
}
|
||||
f("", 5)
|
||||
f("foo", 9)
|
||||
f(`foo
|
||||
bar
|
||||
baz
|
||||
`, 18)
|
||||
f(`
|
||||
Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
|
||||
Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
|
||||
Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
|
||||
Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz)
|
||||
Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz)
|
||||
Apr 28 13:43:37 localhost NetworkManager[1516]: <info> [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL
|
||||
Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined")
|
||||
Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service...
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher'
|
||||
Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
|
||||
Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
|
||||
Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
|
||||
Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online
|
||||
Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal
|
||||
`, 951)
|
||||
|
||||
// Generate a string longer than 1<<16 bytes
|
||||
s := "foo"
|
||||
for len(s) < (1 << 16) {
|
||||
s += s
|
||||
}
|
||||
s += "\n"
|
||||
lines := s
|
||||
f(lines, 36)
|
||||
lines += s
|
||||
f(lines, 52)
|
||||
|
||||
// Generate more than 256 strings
|
||||
lines = ""
|
||||
for i := 0; i < 1000; i++ {
|
||||
lines += fmt.Sprintf("line %d\n", i)
|
||||
}
|
||||
f(lines, 766)
|
||||
}
|
73
lib/logstorage/encoding_timing_test.go
Normal file
73
lib/logstorage/encoding_timing_test.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkMarshalStringsBlock(b *testing.B) {
|
||||
block := strings.Split(benchLogs, "\n")
|
||||
|
||||
b.SetBytes(int64(len(benchLogs)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var buf []byte
|
||||
for pb.Next() {
|
||||
buf = marshalStringsBlock(buf[:0], block)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkStringsBlockUnmarshaler_Unmarshal(b *testing.B) {
|
||||
block := strings.Split(benchLogs, "\n")
|
||||
data := marshalStringsBlock(nil, block)
|
||||
|
||||
b.SetBytes(int64(len(benchLogs)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
sbu := getStringsBlockUnmarshaler()
|
||||
var values []string
|
||||
for pb.Next() {
|
||||
var err error
|
||||
values, err = sbu.unmarshal(values[:0], data, uint64(len(block)))
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %w", err))
|
||||
}
|
||||
sbu.reset()
|
||||
}
|
||||
putStringsBlockUnmarshaler(sbu)
|
||||
})
|
||||
}
|
||||
|
||||
const benchLogs = `
|
||||
Apr 28 13:39:06 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
|
||||
Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
|
||||
Apr 28 13:39:06 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
|
||||
Apr 28 13:40:05 localhost kernel: [35544.823503] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 1 (2437/0 MHz)
|
||||
Apr 28 13:40:15 localhost kernel: [35554.295612] wlp4s0: AP c8:ea:f8:00:6a:31 changed bandwidth, new config is 2437 MHz, width 2 (2447/0 MHz)
|
||||
Apr 28 13:43:37 localhost NetworkManager[1516]: <info> [1651142617.3668] manager: NetworkManager state is now CONNECTED_GLOBAL
|
||||
Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Activating via systemd: service name='org.freedesktop.nm_dispatcher' unit='dbus-org.freedesktop.nm-dispatcher.service' requested by ':1.13' (uid=0 pid=1516 comm="/usr/sbin/NetworkManager --no-daemon " label="unconfined")
|
||||
Apr 28 13:43:37 localhost systemd[1]: Starting Network Manager Script Dispatcher Service...
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] The default IPv4 route is: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Not a paid data plan: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost whoopsie[2812]: [13:43:37] Found usable connection: /org/freedesktop/NetworkManager/ActiveConnection/10
|
||||
Apr 28 13:43:37 localhost dbus-daemon[1475]: [system] Successfully activated service 'org.freedesktop.nm_dispatcher'
|
||||
Apr 28 13:43:37 localhost systemd[1]: Started Network Manager Script Dispatcher Service.
|
||||
Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': new request (2 scripts)
|
||||
Apr 28 13:43:37 localhost nm-dispatcher: req:1 'connectivity-change': start running ordered scripts...
|
||||
Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online
|
||||
Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497807] CPU2: Core temperature above threshold, cpu clock throttled (total events = 22034)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497809] CPU1: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497810] CPU3: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497810] CPU2: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497812] CPU0: Package temperature above threshold, cpu clock throttled (total events = 27400)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499855] CPU2: Core temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499855] CPU0: Core temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499856] CPU1: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499857] CPU3: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499858] CPU0: Package temperature/speed normal
|
||||
Apr 28 13:48:01 localhost kernel: [36020.499859] CPU2: Package temperature/speed normal
|
||||
`
|
22
lib/logstorage/filenames.go
Normal file
22
lib/logstorage/filenames.go
Normal file
|
@ -0,0 +1,22 @@
|
|||
package logstorage
|
||||
|
||||
const (
|
||||
metaindexFilename = "metaindex.bin"
|
||||
indexFilename = "index.bin"
|
||||
columnsHeaderFilename = "columns_header.bin"
|
||||
timestampsFilename = "timestamps.bin"
|
||||
fieldValuesFilename = "field_values.bin"
|
||||
fieldBloomFilename = "field_bloom.bin"
|
||||
messageValuesFilename = "message_values.bin"
|
||||
messageBloomFilename = "message_bloom.bin"
|
||||
|
||||
metadataFilename = "metadata.json"
|
||||
partsFilename = "parts.json"
|
||||
|
||||
streamIDCacheFilename = "stream_id.bin"
|
||||
|
||||
indexdbDirname = "indexdb"
|
||||
datadbDirname = "datadb"
|
||||
cacheDirname = "cache"
|
||||
partitionsDirname = "partitions"
|
||||
)
|
3053
lib/logstorage/filters.go
Normal file
3053
lib/logstorage/filters.go
Normal file
File diff suppressed because it is too large
Load diff
9296
lib/logstorage/filters_test.go
Normal file
9296
lib/logstorage/filters_test.go
Normal file
File diff suppressed because it is too large
Load diff
38
lib/logstorage/hash128.go
Normal file
38
lib/logstorage/hash128.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
func hash128(data []byte) u128 {
|
||||
h := getHasher()
|
||||
_, _ = h.Write(data)
|
||||
hi := h.Sum64()
|
||||
_, _ = h.Write(magicSuffixForHash)
|
||||
lo := h.Sum64()
|
||||
putHasher(h)
|
||||
|
||||
return u128{
|
||||
hi: hi,
|
||||
lo: lo,
|
||||
}
|
||||
}
|
||||
|
||||
var magicSuffixForHash = []byte("magic!")
|
||||
|
||||
func getHasher() *xxhash.Digest {
|
||||
v := hasherPool.Get()
|
||||
if v == nil {
|
||||
return xxhash.New()
|
||||
}
|
||||
return v.(*xxhash.Digest)
|
||||
}
|
||||
|
||||
func putHasher(h *xxhash.Digest) {
|
||||
h.Reset()
|
||||
hasherPool.Put(h)
|
||||
}
|
||||
|
||||
var hasherPool sync.Pool
|
24
lib/logstorage/hash128_test.go
Normal file
24
lib/logstorage/hash128_test.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHash128(t *testing.T) {
|
||||
f := func(data string, hashExpected u128) {
|
||||
t.Helper()
|
||||
h := hash128([]byte(data))
|
||||
if !h.equal(&hashExpected) {
|
||||
t.Fatalf("unexpected hash; got %s; want %s", &h, &hashExpected)
|
||||
}
|
||||
}
|
||||
f("", u128{
|
||||
hi: 17241709254077376921,
|
||||
lo: 13138662262368978769,
|
||||
})
|
||||
|
||||
f("abc", u128{
|
||||
hi: 4952883123889572249,
|
||||
lo: 3255951525518405514,
|
||||
})
|
||||
}
|
29
lib/logstorage/hash128_timing_test.go
Normal file
29
lib/logstorage/hash128_timing_test.go
Normal file
|
@ -0,0 +1,29 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkHash128(b *testing.B) {
|
||||
a := make([][]byte, 100)
|
||||
for i := range a {
|
||||
a[i] = []byte(fmt.Sprintf("some string %d", i))
|
||||
}
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(a)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var n uint64
|
||||
for pb.Next() {
|
||||
for _, b := range a {
|
||||
h := hash128(b)
|
||||
n += h.hi
|
||||
n += h.lo
|
||||
}
|
||||
}
|
||||
atomic.AddUint64(&GlobalSinkU64, n)
|
||||
})
|
||||
}
|
||||
|
||||
var GlobalSinkU64 uint64
|
164
lib/logstorage/index_block_header.go
Normal file
164
lib/logstorage/index_block_header.go
Normal file
|
@ -0,0 +1,164 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// indexBlockHeader contains index information about multiple blocks.
|
||||
//
|
||||
// It allows locating the block by streamID and/or by time range.
|
||||
type indexBlockHeader struct {
|
||||
// streamID is the minimum streamID covered by the indexBlockHeader
|
||||
streamID streamID
|
||||
|
||||
// minTimestamp is the mimumum timestamp seen across blocks covered by the indexBlockHeader
|
||||
minTimestamp int64
|
||||
|
||||
// maxTimestamp is the maximum timestamp seen across blocks covered by the indexBlockHeader
|
||||
maxTimestamp int64
|
||||
|
||||
// indexBlockOffset is an offset of the linked index block at indexFilename
|
||||
indexBlockOffset uint64
|
||||
|
||||
// indexBlockSize is the size of the linked index block at indexFilename
|
||||
indexBlockSize uint64
|
||||
}
|
||||
|
||||
// reset resets ih for subsequent re-use.
|
||||
func (ih *indexBlockHeader) reset() {
|
||||
ih.streamID.reset()
|
||||
ih.minTimestamp = 0
|
||||
ih.maxTimestamp = 0
|
||||
ih.indexBlockOffset = 0
|
||||
ih.indexBlockSize = 0
|
||||
}
|
||||
|
||||
// mustWriteIndexBlock writes data with the given additioanl args to sw and updates ih accordingly.
|
||||
func (ih *indexBlockHeader) mustWriteIndexBlock(data []byte, sidFirst streamID, minTimestamp, maxTimestamp int64, sw *streamWriters) {
|
||||
ih.streamID = sidFirst
|
||||
ih.minTimestamp = minTimestamp
|
||||
ih.maxTimestamp = maxTimestamp
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B = encoding.CompressZSTDLevel(bb.B[:0], data, 1)
|
||||
ih.indexBlockOffset = sw.indexWriter.bytesWritten
|
||||
ih.indexBlockSize = uint64(len(bb.B))
|
||||
sw.indexWriter.MustWrite(bb.B)
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
|
||||
// mustReadNextIndexBlock reads the next index block associated with ih from src, appends it to dst and returns the result.
|
||||
func (ih *indexBlockHeader) mustReadNextIndexBlock(dst []byte, sr *streamReaders) []byte {
|
||||
indexReader := &sr.indexReader
|
||||
|
||||
indexBlockSize := ih.indexBlockSize
|
||||
if indexBlockSize > maxIndexBlockSize {
|
||||
logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockSize=%d cannot exceed %d bytes", indexReader.Path(), indexBlockSize, maxIndexBlockSize)
|
||||
}
|
||||
if ih.indexBlockOffset != indexReader.bytesRead {
|
||||
logger.Panicf("FATAL: %s: indexBlockHeader.indexBlockOffset=%d must equal to %d", indexReader.Path(), ih.indexBlockOffset, indexReader.bytesRead)
|
||||
}
|
||||
bbCompressed := longTermBufPool.Get()
|
||||
bbCompressed.B = bytesutil.ResizeNoCopyMayOverallocate(bbCompressed.B, int(indexBlockSize))
|
||||
indexReader.MustReadFull(bbCompressed.B)
|
||||
|
||||
// Decompress bbCompressed to dst
|
||||
var err error
|
||||
dst, err = encoding.DecompressZSTD(dst, bbCompressed.B)
|
||||
longTermBufPool.Put(bbCompressed)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot decompress indexBlock read at offset %d with size %d: %s", indexReader.Path(), ih.indexBlockOffset, indexBlockSize, err)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// marshal appends marshaled ih to dst and returns the result.
|
||||
func (ih *indexBlockHeader) marshal(dst []byte) []byte {
|
||||
dst = ih.streamID.marshal(dst)
|
||||
dst = encoding.MarshalUint64(dst, uint64(ih.minTimestamp))
|
||||
dst = encoding.MarshalUint64(dst, uint64(ih.maxTimestamp))
|
||||
dst = encoding.MarshalUint64(dst, ih.indexBlockOffset)
|
||||
dst = encoding.MarshalUint64(dst, ih.indexBlockSize)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals ih from src and returns the tail left.
|
||||
func (ih *indexBlockHeader) unmarshal(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
|
||||
// unmarshal ih.streamID
|
||||
tail, err := ih.streamID.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal streamID: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
// unmarshal the rest of indexBlockHeader fields
|
||||
if len(src) < 32 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal indexBlockHeader from %d bytes; need at least 32 bytes", len(src))
|
||||
}
|
||||
ih.minTimestamp = int64(encoding.UnmarshalUint64(src))
|
||||
ih.maxTimestamp = int64(encoding.UnmarshalUint64(src[8:]))
|
||||
ih.indexBlockOffset = encoding.UnmarshalUint64(src[16:])
|
||||
ih.indexBlockSize = encoding.UnmarshalUint64(src[24:])
|
||||
|
||||
return src[32:], nil
|
||||
}
|
||||
|
||||
// mustReadIndexBlockHeaders reads indexBlockHeader entries from r, appends them to dst and returns the result.
|
||||
func mustReadIndexBlockHeaders(dst []indexBlockHeader, r *readerWithStats) []indexBlockHeader {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot read indexBlockHeader entries from %s: %s", r.Path(), err)
|
||||
}
|
||||
|
||||
bb := longTermBufPool.Get()
|
||||
bb.B, err = encoding.DecompressZSTD(bb.B[:0], data)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot decompress indexBlockHeader entries from %s: %s", r.Path(), err)
|
||||
}
|
||||
dst, err = unmarshalIndexBlockHeaders(dst, bb.B)
|
||||
if len(bb.B) < 1024*1024 {
|
||||
longTermBufPool.Put(bb)
|
||||
}
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot parse indexBlockHeader entries from %s: %s", r.Path(), err)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshalIndexBlockHeaders appends unmarshaled from src indexBlockHeader entries to dst and returns the result.
|
||||
func unmarshalIndexBlockHeaders(dst []indexBlockHeader, src []byte) ([]indexBlockHeader, error) {
|
||||
dstOrig := dst
|
||||
for len(src) > 0 {
|
||||
if len(dst) < cap(dst) {
|
||||
dst = dst[:len(dst)+1]
|
||||
} else {
|
||||
dst = append(dst, indexBlockHeader{})
|
||||
}
|
||||
ih := &dst[len(dst)-1]
|
||||
tail, err := ih.unmarshal(src)
|
||||
if err != nil {
|
||||
return dstOrig, fmt.Errorf("cannot unmarshal indexBlockHeader %d: %w", len(dst)-len(dstOrig), err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
if err := validateIndexBlockHeaders(dst[len(dstOrig):]); err != nil {
|
||||
return dstOrig, err
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func validateIndexBlockHeaders(ihs []indexBlockHeader) error {
|
||||
for i := 1; i < len(ihs); i++ {
|
||||
if ihs[i].streamID.less(&ihs[i-1].streamID) {
|
||||
return fmt.Errorf("unexpected indexBlockHeader with smaller streamID=%s after bigger streamID=%s", &ihs[i].streamID, &ihs[i-1].streamID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
138
lib/logstorage/index_block_header_test.go
Normal file
138
lib/logstorage/index_block_header_test.go
Normal file
|
@ -0,0 +1,138 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIndexBlockHeaderMarshalUnmarshal(t *testing.T) {
|
||||
f := func(ih *indexBlockHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := ih.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected marshaled length of indexBlockHeader; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
var ih2 indexBlockHeader
|
||||
tail, err := ih2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot unmarshal indexBlockHeader: %s", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
t.Fatalf("unexpected non-empty tail left after unmarshaling indexBlockHeader: %X", tail)
|
||||
}
|
||||
if !reflect.DeepEqual(ih, &ih2) {
|
||||
t.Fatalf("unexpected unmarshaled indexBlockHeader\ngot\n%v\nwant\n%v", &ih2, ih)
|
||||
}
|
||||
}
|
||||
f(&indexBlockHeader{}, 56)
|
||||
f(&indexBlockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
hi: 214,
|
||||
lo: 2111,
|
||||
},
|
||||
},
|
||||
minTimestamp: 1234,
|
||||
maxTimestamp: 898943,
|
||||
indexBlockOffset: 234,
|
||||
indexBlockSize: 898,
|
||||
}, 56)
|
||||
}
|
||||
|
||||
func TestIndexBlockHeaderUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var ih indexBlockHeader
|
||||
tail, err := ih.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
|
||||
ih := &indexBlockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
hi: 214,
|
||||
lo: 2111,
|
||||
},
|
||||
},
|
||||
minTimestamp: 1234,
|
||||
maxTimestamp: 898943,
|
||||
indexBlockOffset: 234,
|
||||
indexBlockSize: 898,
|
||||
}
|
||||
data := ih.marshal(nil)
|
||||
for len(data) > 0 {
|
||||
data = data[:len(data)-1]
|
||||
f(data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexBlockHeaderReset(t *testing.T) {
|
||||
ih := &indexBlockHeader{
|
||||
streamID: streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
hi: 214,
|
||||
lo: 2111,
|
||||
},
|
||||
},
|
||||
minTimestamp: 1234,
|
||||
maxTimestamp: 898943,
|
||||
indexBlockOffset: 234,
|
||||
indexBlockSize: 898,
|
||||
}
|
||||
ih.reset()
|
||||
ihZero := &indexBlockHeader{}
|
||||
if !reflect.DeepEqual(ih, ihZero) {
|
||||
t.Fatalf("unexpected non-zero indexBlockHeader after reset: %v", ih)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalUnmarshalIndexBlockHeaders(t *testing.T) {
|
||||
f := func(ihs []indexBlockHeader, marshaledLen int) {
|
||||
t.Helper()
|
||||
var data []byte
|
||||
for i := range ihs {
|
||||
data = ihs[i].marshal(data)
|
||||
}
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected marshaled length for indexBlockHeader entries; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
ihs2, err := unmarshalIndexBlockHeaders(nil, data)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot unmarshal indexBlockHeader entries: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(ihs, ihs2) {
|
||||
t.Fatalf("unexpected indexBlockHeader entries after unmarshaling\ngot\n%v\nwant\n%v", ihs2, ihs)
|
||||
}
|
||||
}
|
||||
f(nil, 0)
|
||||
f([]indexBlockHeader{{}}, 56)
|
||||
f([]indexBlockHeader{
|
||||
{
|
||||
indexBlockOffset: 234,
|
||||
indexBlockSize: 5432,
|
||||
},
|
||||
{
|
||||
minTimestamp: -123,
|
||||
},
|
||||
}, 112)
|
||||
}
|
900
lib/logstorage/indexdb.go
Normal file
900
lib/logstorage/indexdb.go
Normal file
|
@ -0,0 +1,900 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
)
|
||||
|
||||
const (
|
||||
// (tenantID:streamID) entries have this prefix
|
||||
//
|
||||
// These entries are used for detecting whether the given stream is already registered
|
||||
nsPrefixStreamID = 0
|
||||
|
||||
// (tenantID:streamID -> streamTagsCanonical) entries have this prefix
|
||||
nsPrefixStreamIDToStreamTags = 1
|
||||
|
||||
// (tenantID:name:value => streamIDs) entries have this prefix
|
||||
nsPrefixTagToStreamIDs = 2
|
||||
)
|
||||
|
||||
// IndexdbStats contains indexdb stats
|
||||
type IndexdbStats struct {
|
||||
// StreamsCreatedTotal is the number of log streams created since the indexdb initialization.
|
||||
StreamsCreatedTotal uint64
|
||||
}
|
||||
|
||||
type indexdb struct {
|
||||
// streamsCreatedTotal is the number of log streams created since the indexdb intialization.
|
||||
streamsCreatedTotal uint64
|
||||
|
||||
// path is the path to indexdb
|
||||
path string
|
||||
|
||||
// partitionName is the name of the partition for the indexdb.
|
||||
partitionName string
|
||||
|
||||
// tb is the storage for indexdb
|
||||
tb *mergeset.Table
|
||||
|
||||
// indexSearchPool is a pool of indexSearch struct for the given indexdb
|
||||
indexSearchPool sync.Pool
|
||||
|
||||
// the generation of the streamFilterCache.
|
||||
// It is updated each time new item is added to tb.
|
||||
streamFilterCacheGeneration uint32
|
||||
|
||||
// s is the storage where indexdb belongs to.
|
||||
s *Storage
|
||||
}
|
||||
|
||||
func mustCreateIndexdb(path string) {
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
}
|
||||
|
||||
func mustOpenIndexdb(path, partitionName string, s *Storage) *indexdb {
|
||||
idb := &indexdb{
|
||||
path: path,
|
||||
partitionName: partitionName,
|
||||
s: s,
|
||||
}
|
||||
isReadOnly := uint32(0)
|
||||
idb.tb = mergeset.MustOpenTable(path, idb.invalidateStreamFilterCache, mergeTagToStreamIDsRows, &isReadOnly)
|
||||
return idb
|
||||
}
|
||||
|
||||
func mustCloseIndexdb(idb *indexdb) {
|
||||
idb.tb.MustClose()
|
||||
idb.tb = nil
|
||||
idb.s = nil
|
||||
idb.partitionName = ""
|
||||
idb.path = ""
|
||||
}
|
||||
|
||||
func (idb *indexdb) debugFlush() {
|
||||
idb.tb.DebugFlush()
|
||||
}
|
||||
|
||||
func (idb *indexdb) updateStats(d *IndexdbStats) {
|
||||
d.StreamsCreatedTotal += atomic.LoadUint64(&idb.streamsCreatedTotal)
|
||||
}
|
||||
|
||||
func (idb *indexdb) appendStreamTagsByStreamID(dst []byte, sid *streamID) []byte {
|
||||
is := idb.getIndexSearch()
|
||||
defer idb.putIndexSearch(is)
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamIDToStreamTags, sid.tenantID)
|
||||
kb.B = sid.id.marshal(kb.B)
|
||||
|
||||
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
|
||||
if err == io.EOF {
|
||||
return dst
|
||||
}
|
||||
logger.Panicf("FATAL: unexpected error when searching for StreamTags by streamID=%s in indexdb: %s", sid, err)
|
||||
}
|
||||
data := ts.Item[len(kb.B):]
|
||||
dst = append(dst, data...)
|
||||
return dst
|
||||
}
|
||||
|
||||
// hasStreamID returns true if streamID exists in idb
|
||||
func (idb *indexdb) hasStreamID(sid *streamID) bool {
|
||||
is := idb.getIndexSearch()
|
||||
defer idb.putIndexSearch(is)
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
|
||||
kb.B = marshalCommonPrefix(kb.B, nsPrefixStreamID, sid.tenantID)
|
||||
kb.B = sid.id.marshal(kb.B)
|
||||
|
||||
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
|
||||
if err == io.EOF {
|
||||
return false
|
||||
}
|
||||
logger.Panicf("FATAL: unexpected error when searching for streamID=%s in indexdb: %s", sid, err)
|
||||
}
|
||||
return len(kb.B) == len(ts.Item)
|
||||
}
|
||||
|
||||
type indexSearch struct {
|
||||
idb *indexdb
|
||||
ts mergeset.TableSearch
|
||||
kb bytesutil.ByteBuffer
|
||||
}
|
||||
|
||||
func (idb *indexdb) getIndexSearch() *indexSearch {
|
||||
v := idb.indexSearchPool.Get()
|
||||
if v == nil {
|
||||
v = &indexSearch{
|
||||
idb: idb,
|
||||
}
|
||||
}
|
||||
is := v.(*indexSearch)
|
||||
is.ts.Init(idb.tb)
|
||||
return is
|
||||
}
|
||||
|
||||
func (idb *indexdb) putIndexSearch(is *indexSearch) {
|
||||
is.idb = nil
|
||||
is.ts.MustClose()
|
||||
is.kb.Reset()
|
||||
|
||||
idb.indexSearchPool.Put(is)
|
||||
}
|
||||
|
||||
// searchStreamIDs returns streamIDs for the given tenantIDs and the given stream filters
|
||||
func (idb *indexdb) searchStreamIDs(tenantIDs []TenantID, sf *StreamFilter) []streamID {
|
||||
// Try obtaining streamIDs from cache
|
||||
streamIDs, ok := idb.loadStreamIDsFromCache(tenantIDs, sf)
|
||||
if ok {
|
||||
// Fast path - streamIDs found in the cache.
|
||||
return streamIDs
|
||||
}
|
||||
|
||||
// Slow path - collect streamIDs from indexdb.
|
||||
|
||||
// Collect streamIDs for all the specified tenantIDs.
|
||||
is := idb.getIndexSearch()
|
||||
m := make(map[streamID]struct{})
|
||||
for _, tenantID := range tenantIDs {
|
||||
for _, asf := range sf.orFilters {
|
||||
is.updateStreamIDs(m, tenantID, asf)
|
||||
}
|
||||
}
|
||||
idb.putIndexSearch(is)
|
||||
|
||||
// Convert the collected streamIDs from m to sorted slice.
|
||||
streamIDs = make([]streamID, 0, len(m))
|
||||
for streamID := range m {
|
||||
streamIDs = append(streamIDs, streamID)
|
||||
}
|
||||
sortStreamIDs(streamIDs)
|
||||
|
||||
// Store the collected streamIDs to cache.
|
||||
idb.storeStreamIDsToCache(tenantIDs, sf, streamIDs)
|
||||
|
||||
return streamIDs
|
||||
}
|
||||
|
||||
func sortStreamIDs(streamIDs []streamID) {
|
||||
sort.Slice(streamIDs, func(i, j int) bool {
|
||||
return streamIDs[i].less(&streamIDs[j])
|
||||
})
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateStreamIDs(dst map[streamID]struct{}, tenantID TenantID, asf *andStreamFilter) {
|
||||
var m map[u128]struct{}
|
||||
for _, tf := range asf.tagFilters {
|
||||
ids := is.getStreamIDsForTagFilter(tenantID, tf)
|
||||
if len(ids) == 0 {
|
||||
// There is no need in checking the remaining filters,
|
||||
// since the result will be empty in any case.
|
||||
return
|
||||
}
|
||||
if m == nil {
|
||||
m = ids
|
||||
} else {
|
||||
for id := range m {
|
||||
if _, ok := ids[id]; !ok {
|
||||
delete(m, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var sid streamID
|
||||
for id := range m {
|
||||
sid.tenantID = tenantID
|
||||
sid.id = id
|
||||
dst[sid] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTagFilter) map[u128]struct{} {
|
||||
switch tf.op {
|
||||
case "=":
|
||||
if tf.value == "" {
|
||||
// (field="")
|
||||
return is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
|
||||
}
|
||||
// (field="value")
|
||||
return is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value)
|
||||
case "!=":
|
||||
if tf.value == "" {
|
||||
// (field!="")
|
||||
return is.getStreamIDsForTagName(tenantID, tf.tagName)
|
||||
}
|
||||
// (field!="value") => (all and not field="value")
|
||||
ids := is.getStreamIDsForTenant(tenantID)
|
||||
idsForTag := is.getStreamIDsForNonEmptyTagValue(tenantID, tf.tagName, tf.value)
|
||||
for id := range idsForTag {
|
||||
delete(ids, id)
|
||||
}
|
||||
return ids
|
||||
case "=~":
|
||||
re := tf.getRegexp()
|
||||
if re.MatchString("") {
|
||||
// (field=~"|re") => (field="" or field=~"re")
|
||||
ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
|
||||
idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
|
||||
for id := range idsForRe {
|
||||
ids[id] = struct{}{}
|
||||
}
|
||||
return ids
|
||||
}
|
||||
return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
|
||||
case "!~":
|
||||
re := tf.getRegexp()
|
||||
if re.MatchString("") {
|
||||
// (field!~"|re") => (field!="" and not field=~"re")
|
||||
ids := is.getStreamIDsForTagName(tenantID, tf.tagName)
|
||||
if len(ids) == 0 {
|
||||
return ids
|
||||
}
|
||||
idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
|
||||
for id := range idsForRe {
|
||||
delete(ids, id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
// (field!~"re") => (all and not field=~"re")
|
||||
ids := is.getStreamIDsForTenant(tenantID)
|
||||
idsForRe := is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
|
||||
for id := range idsForRe {
|
||||
delete(ids, id)
|
||||
}
|
||||
return ids
|
||||
default:
|
||||
logger.Panicf("BUG: unexpected operation in stream tag filter: %q", tf.op)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForNonEmptyTagValue(tenantID TenantID, tagName, tagValue string) map[u128]struct{} {
|
||||
ids := make(map[u128]struct{})
|
||||
var sp tagToStreamIDsRowParser
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
|
||||
kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
|
||||
kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagValue))
|
||||
prefix := kb.B
|
||||
ts.Seek(prefix)
|
||||
for ts.NextItem() {
|
||||
item := ts.Item
|
||||
if !bytes.HasPrefix(item, prefix) {
|
||||
break
|
||||
}
|
||||
tail := item[len(prefix):]
|
||||
sp.UpdateStreamIDs(ids, tail)
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: unexpected error: %s", err)
|
||||
}
|
||||
|
||||
return ids
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForEmptyTagValue(tenantID TenantID, tagName string) map[u128]struct{} {
|
||||
ids := is.getStreamIDsForTenant(tenantID)
|
||||
idsForTag := is.getStreamIDsForTagName(tenantID, tagName)
|
||||
for id := range idsForTag {
|
||||
delete(ids, id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForTenant(tenantID TenantID) map[u128]struct{} {
|
||||
ids := make(map[u128]struct{})
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamID, tenantID)
|
||||
prefix := kb.B
|
||||
ts.Seek(prefix)
|
||||
var id u128
|
||||
for ts.NextItem() {
|
||||
item := ts.Item
|
||||
if !bytes.HasPrefix(item, prefix) {
|
||||
break
|
||||
}
|
||||
tail, err := id.unmarshal(item[len(prefix):])
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal streamID from (tenantID:streamID) entry: %s", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
logger.Panicf("FATAL: unexpected non-empty tail left after unmarshaling streamID from (tenantID:streamID); tail len=%d", len(tail))
|
||||
}
|
||||
ids[id] = struct{}{}
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: unexpected error: %s", err)
|
||||
}
|
||||
|
||||
return ids
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForTagName(tenantID TenantID, tagName string) map[u128]struct{} {
|
||||
ids := make(map[u128]struct{})
|
||||
var sp tagToStreamIDsRowParser
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
|
||||
kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
|
||||
prefix := kb.B
|
||||
ts.Seek(prefix)
|
||||
for ts.NextItem() {
|
||||
item := ts.Item
|
||||
if !bytes.HasPrefix(item, prefix) {
|
||||
break
|
||||
}
|
||||
tail := item[len(prefix):]
|
||||
n := bytes.IndexByte(tail, tagSeparatorChar)
|
||||
if n < 0 {
|
||||
logger.Panicf("FATAL: cannot find the end of tag value")
|
||||
}
|
||||
tail = tail[n+1:]
|
||||
sp.UpdateStreamIDs(ids, tail)
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: unexpected error: %s", err)
|
||||
}
|
||||
|
||||
return ids
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStreamIDsForTagRegexp(tenantID TenantID, tagName string, re *regexutil.PromRegex) map[u128]struct{} {
|
||||
ids := make(map[u128]struct{})
|
||||
var sp tagToStreamIDsRowParser
|
||||
var tagValue, prevMatchingTagValue []byte
|
||||
var err error
|
||||
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToStreamIDs, tenantID)
|
||||
kb.B = marshalTagValue(kb.B, bytesutil.ToUnsafeBytes(tagName))
|
||||
prefix := kb.B
|
||||
ts.Seek(prefix)
|
||||
for ts.NextItem() {
|
||||
item := ts.Item
|
||||
if !bytes.HasPrefix(item, prefix) {
|
||||
break
|
||||
}
|
||||
tail := item[len(prefix):]
|
||||
tail, tagValue, err = unmarshalTagValue(tagValue[:0], tail)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal tag value: %s", err)
|
||||
}
|
||||
if !bytes.Equal(tagValue, prevMatchingTagValue) {
|
||||
if !re.MatchString(bytesutil.ToUnsafeString(tagValue)) {
|
||||
continue
|
||||
}
|
||||
prevMatchingTagValue = append(prevMatchingTagValue[:0], tagValue...)
|
||||
}
|
||||
sp.UpdateStreamIDs(ids, tail)
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
logger.Panicf("FATAL: unexpected error: %s", err)
|
||||
}
|
||||
|
||||
return ids
|
||||
}
|
||||
|
||||
func (idb *indexdb) mustRegisterStream(streamID *streamID, streamTagsCanonical []byte) {
|
||||
st := GetStreamTags()
|
||||
mustUnmarshalStreamTags(st, streamTagsCanonical)
|
||||
tenantID := streamID.tenantID
|
||||
|
||||
bi := getBatchItems()
|
||||
buf := bi.buf[:0]
|
||||
items := bi.items[:0]
|
||||
|
||||
// Register tenantID:streamID entry.
|
||||
bufLen := len(buf)
|
||||
buf = marshalCommonPrefix(buf, nsPrefixStreamID, tenantID)
|
||||
buf = streamID.id.marshal(buf)
|
||||
items = append(items, buf[bufLen:])
|
||||
|
||||
// Register tenantID:streamID -> streamTagsCanonical entry.
|
||||
bufLen = len(buf)
|
||||
buf = marshalCommonPrefix(buf, nsPrefixStreamIDToStreamTags, tenantID)
|
||||
buf = streamID.id.marshal(buf)
|
||||
buf = append(buf, streamTagsCanonical...)
|
||||
items = append(items, buf[bufLen:])
|
||||
|
||||
// Register tenantID:name:value -> streamIDs entries.
|
||||
tags := st.tags
|
||||
for i := range tags {
|
||||
bufLen = len(buf)
|
||||
buf = marshalCommonPrefix(buf, nsPrefixTagToStreamIDs, tenantID)
|
||||
buf = tags[i].indexdbMarshal(buf)
|
||||
buf = streamID.id.marshal(buf)
|
||||
items = append(items, buf[bufLen:])
|
||||
}
|
||||
PutStreamTags(st)
|
||||
|
||||
// Add items to the storage
|
||||
idb.tb.AddItems(items)
|
||||
|
||||
bi.buf = buf
|
||||
bi.items = items
|
||||
putBatchItems(bi)
|
||||
|
||||
atomic.AddUint64(&idb.streamsCreatedTotal, 1)
|
||||
}
|
||||
|
||||
func (idb *indexdb) invalidateStreamFilterCache() {
|
||||
// This function must be fast, since it is called each
|
||||
// time new indexdb entry is added.
|
||||
atomic.AddUint32(&idb.streamFilterCacheGeneration, 1)
|
||||
}
|
||||
|
||||
func (idb *indexdb) marshalStreamFilterCacheKey(dst []byte, tenantIDs []TenantID, sf *StreamFilter) []byte {
|
||||
dst = encoding.MarshalUint32(dst, idb.streamFilterCacheGeneration)
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(idb.partitionName))
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(tenantIDs)))
|
||||
for i := range tenantIDs {
|
||||
dst = tenantIDs[i].marshal(dst)
|
||||
}
|
||||
dst = sf.marshalForCacheKey(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (idb *indexdb) loadStreamIDsFromCache(tenantIDs []TenantID, sf *StreamFilter) ([]streamID, bool) {
|
||||
bb := bbPool.Get()
|
||||
bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf)
|
||||
data := idb.s.streamFilterCache.GetBig(nil, bb.B)
|
||||
bbPool.Put(bb)
|
||||
if len(data) == 0 {
|
||||
// Cache miss
|
||||
return nil, false
|
||||
}
|
||||
// Cache hit - unpack streamIDs from data.
|
||||
tail, n, err := encoding.UnmarshalVarUint64(data)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when unmarshaling the number of streamIDs from cache: %s", err)
|
||||
}
|
||||
src := tail
|
||||
streamIDs := make([]streamID, n)
|
||||
for i := uint64(0); i < n; i++ {
|
||||
tail, err = streamIDs[i].unmarshal(src)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when unmarshaling streamID #%d: %s", i, err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
if len(src) > 0 {
|
||||
logger.Panicf("BUG: unexpected non-empty tail left with len=%d", len(src))
|
||||
}
|
||||
return streamIDs, true
|
||||
}
|
||||
|
||||
func (idb *indexdb) storeStreamIDsToCache(tenantIDs []TenantID, sf *StreamFilter, streamIDs []streamID) {
|
||||
// marshal streamIDs
|
||||
var b []byte
|
||||
b = encoding.MarshalVarUint64(b, uint64(len(streamIDs)))
|
||||
for i := 0; i < len(streamIDs); i++ {
|
||||
b = streamIDs[i].marshal(b)
|
||||
}
|
||||
|
||||
// Store marshaled streamIDs to cache.
|
||||
bb := bbPool.Get()
|
||||
bb.B = idb.marshalStreamFilterCacheKey(bb.B[:0], tenantIDs, sf)
|
||||
idb.s.streamFilterCache.SetBig(bb.B, b)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
type batchItems struct {
|
||||
buf []byte
|
||||
|
||||
items [][]byte
|
||||
}
|
||||
|
||||
func (bi *batchItems) reset() {
|
||||
bi.buf = bi.buf[:0]
|
||||
|
||||
items := bi.items
|
||||
for i := range items {
|
||||
items[i] = nil
|
||||
}
|
||||
bi.items = items[:0]
|
||||
}
|
||||
|
||||
func getBatchItems() *batchItems {
|
||||
v := batchItemsPool.Get()
|
||||
if v == nil {
|
||||
return &batchItems{}
|
||||
}
|
||||
return v.(*batchItems)
|
||||
}
|
||||
|
||||
func putBatchItems(bi *batchItems) {
|
||||
bi.reset()
|
||||
batchItemsPool.Put(bi)
|
||||
}
|
||||
|
||||
var batchItemsPool sync.Pool
|
||||
|
||||
func mergeTagToStreamIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) {
|
||||
// Perform quick checks whether items contain rows starting from nsPrefixTagToStreamIDs
|
||||
// based on the fact that items are sorted.
|
||||
if len(items) <= 2 {
|
||||
// The first and the last row must remain unchanged.
|
||||
return data, items
|
||||
}
|
||||
firstItem := items[0].Bytes(data)
|
||||
if len(firstItem) > 0 && firstItem[0] > nsPrefixTagToStreamIDs {
|
||||
return data, items
|
||||
}
|
||||
lastItem := items[len(items)-1].Bytes(data)
|
||||
if len(lastItem) > 0 && lastItem[0] < nsPrefixTagToStreamIDs {
|
||||
return data, items
|
||||
}
|
||||
|
||||
// items contain at least one row starting from nsPrefixTagToStreamIDs. Merge rows with common tag.
|
||||
tsm := getTagToStreamIDsRowsMerger()
|
||||
tsm.dataCopy = append(tsm.dataCopy[:0], data...)
|
||||
tsm.itemsCopy = append(tsm.itemsCopy[:0], items...)
|
||||
sp := &tsm.sp
|
||||
spPrev := &tsm.spPrev
|
||||
dstData := data[:0]
|
||||
dstItems := items[:0]
|
||||
for i, it := range items {
|
||||
item := it.Bytes(data)
|
||||
if len(item) == 0 || item[0] != nsPrefixTagToStreamIDs || i == 0 || i == len(items)-1 {
|
||||
// Write rows not starting with nsPrefixTagToStreamIDs as-is.
|
||||
// Additionally write the first and the last row as-is in order to preserve
|
||||
// sort order for adjacent blocks.
|
||||
dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
|
||||
dstData = append(dstData, item...)
|
||||
dstItems = append(dstItems, mergeset.Item{
|
||||
Start: uint32(len(dstData) - len(item)),
|
||||
End: uint32(len(dstData)),
|
||||
})
|
||||
continue
|
||||
}
|
||||
if err := sp.Init(item); err != nil {
|
||||
logger.Panicf("FATAL: cannot parse row during merge: %s", err)
|
||||
}
|
||||
if sp.StreamIDsLen() >= maxStreamIDsPerRow {
|
||||
dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
|
||||
dstData = append(dstData, item...)
|
||||
dstItems = append(dstItems, mergeset.Item{
|
||||
Start: uint32(len(dstData) - len(item)),
|
||||
End: uint32(len(dstData)),
|
||||
})
|
||||
continue
|
||||
}
|
||||
if !sp.EqualPrefix(spPrev) {
|
||||
dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
|
||||
}
|
||||
sp.ParseStreamIDs()
|
||||
tsm.pendingStreamIDs = append(tsm.pendingStreamIDs, sp.StreamIDs...)
|
||||
spPrev, sp = sp, spPrev
|
||||
if len(tsm.pendingStreamIDs) >= maxStreamIDsPerRow {
|
||||
dstData, dstItems = tsm.flushPendingStreamIDs(dstData, dstItems, spPrev)
|
||||
}
|
||||
}
|
||||
if len(tsm.pendingStreamIDs) > 0 {
|
||||
logger.Panicf("BUG: tsm.pendingStreamIDs must be empty at this point; got %d items", len(tsm.pendingStreamIDs))
|
||||
}
|
||||
if !checkItemsSorted(dstData, dstItems) {
|
||||
// Items could become unsorted if initial items contain duplicate streamIDs:
|
||||
//
|
||||
// item1: 1, 1, 5
|
||||
// item2: 1, 4
|
||||
//
|
||||
// Items could become the following after the merge:
|
||||
//
|
||||
// item1: 1, 5
|
||||
// item2: 1, 4
|
||||
//
|
||||
// i.e. item1 > item2
|
||||
//
|
||||
// Leave the original items unmerged, so they can be merged next time.
|
||||
// This case should be quite rare - if multiple data points are simultaneously inserted
|
||||
// into the same new time series from multiple concurrent goroutines.
|
||||
dstData = append(dstData[:0], tsm.dataCopy...)
|
||||
dstItems = append(dstItems[:0], tsm.itemsCopy...)
|
||||
if !checkItemsSorted(dstData, dstItems) {
|
||||
logger.Panicf("BUG: the original items weren't sorted; items=%q", dstItems)
|
||||
}
|
||||
}
|
||||
putTagToStreamIDsRowsMerger(tsm)
|
||||
return dstData, dstItems
|
||||
}
|
||||
|
||||
// maxStreamIDsPerRow limits the number of streamIDs in tenantID:name:value -> streamIDs row.
|
||||
//
|
||||
// This reduces overhead on index and metaindex in lib/mergeset.
|
||||
const maxStreamIDsPerRow = 32
|
||||
|
||||
type u128Sorter []u128
|
||||
|
||||
func (s u128Sorter) Len() int { return len(s) }
|
||||
func (s u128Sorter) Less(i, j int) bool {
|
||||
return s[i].less(&s[j])
|
||||
}
|
||||
func (s u128Sorter) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
type tagToStreamIDsRowsMerger struct {
|
||||
pendingStreamIDs u128Sorter
|
||||
sp tagToStreamIDsRowParser
|
||||
spPrev tagToStreamIDsRowParser
|
||||
|
||||
itemsCopy []mergeset.Item
|
||||
dataCopy []byte
|
||||
}
|
||||
|
||||
func (tsm *tagToStreamIDsRowsMerger) Reset() {
|
||||
tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0]
|
||||
tsm.sp.Reset()
|
||||
tsm.spPrev.Reset()
|
||||
|
||||
tsm.itemsCopy = tsm.itemsCopy[:0]
|
||||
tsm.dataCopy = tsm.dataCopy[:0]
|
||||
}
|
||||
|
||||
func (tsm *tagToStreamIDsRowsMerger) flushPendingStreamIDs(dstData []byte, dstItems []mergeset.Item, sp *tagToStreamIDsRowParser) ([]byte, []mergeset.Item) {
|
||||
if len(tsm.pendingStreamIDs) == 0 {
|
||||
// Nothing to flush
|
||||
return dstData, dstItems
|
||||
}
|
||||
// Use sort.Sort instead of sort.Slice in order to reduce memory allocations.
|
||||
sort.Sort(&tsm.pendingStreamIDs)
|
||||
tsm.pendingStreamIDs = removeDuplicateStreamIDs(tsm.pendingStreamIDs)
|
||||
|
||||
// Marshal pendingStreamIDs
|
||||
dstDataLen := len(dstData)
|
||||
dstData = sp.MarshalPrefix(dstData)
|
||||
pendingStreamIDs := tsm.pendingStreamIDs
|
||||
for i := range pendingStreamIDs {
|
||||
dstData = pendingStreamIDs[i].marshal(dstData)
|
||||
}
|
||||
dstItems = append(dstItems, mergeset.Item{
|
||||
Start: uint32(dstDataLen),
|
||||
End: uint32(len(dstData)),
|
||||
})
|
||||
tsm.pendingStreamIDs = tsm.pendingStreamIDs[:0]
|
||||
return dstData, dstItems
|
||||
}
|
||||
|
||||
func removeDuplicateStreamIDs(sortedStreamIDs []u128) []u128 {
|
||||
if len(sortedStreamIDs) < 2 {
|
||||
return sortedStreamIDs
|
||||
}
|
||||
hasDuplicates := false
|
||||
for i := 1; i < len(sortedStreamIDs); i++ {
|
||||
if sortedStreamIDs[i-1] == sortedStreamIDs[i] {
|
||||
hasDuplicates = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasDuplicates {
|
||||
return sortedStreamIDs
|
||||
}
|
||||
dstStreamIDs := sortedStreamIDs[:1]
|
||||
for i := 1; i < len(sortedStreamIDs); i++ {
|
||||
if sortedStreamIDs[i-1] == sortedStreamIDs[i] {
|
||||
continue
|
||||
}
|
||||
dstStreamIDs = append(dstStreamIDs, sortedStreamIDs[i])
|
||||
}
|
||||
return dstStreamIDs
|
||||
}
|
||||
|
||||
func getTagToStreamIDsRowsMerger() *tagToStreamIDsRowsMerger {
|
||||
v := tsmPool.Get()
|
||||
if v == nil {
|
||||
return &tagToStreamIDsRowsMerger{}
|
||||
}
|
||||
return v.(*tagToStreamIDsRowsMerger)
|
||||
}
|
||||
|
||||
func putTagToStreamIDsRowsMerger(tsm *tagToStreamIDsRowsMerger) {
|
||||
tsm.Reset()
|
||||
tsmPool.Put(tsm)
|
||||
}
|
||||
|
||||
var tsmPool sync.Pool
|
||||
|
||||
type tagToStreamIDsRowParser struct {
|
||||
// TenantID contains TenantID of the parsed row
|
||||
TenantID TenantID
|
||||
|
||||
// StreamIDs contains parsed StreamIDs after ParseStreamIDs call
|
||||
StreamIDs []u128
|
||||
|
||||
// streamIDsParsed is set to true after ParseStreamIDs call
|
||||
streamIDsParsed bool
|
||||
|
||||
// Tag contains parsed tag after Init call
|
||||
Tag streamTag
|
||||
|
||||
// tail contains the remaining unparsed streamIDs
|
||||
tail []byte
|
||||
}
|
||||
|
||||
func (sp *tagToStreamIDsRowParser) Reset() {
|
||||
sp.TenantID.Reset()
|
||||
sp.StreamIDs = sp.StreamIDs[:0]
|
||||
sp.streamIDsParsed = false
|
||||
sp.Tag.reset()
|
||||
sp.tail = nil
|
||||
}
|
||||
|
||||
// Init initializes sp from b, which should contain encoded tenantID:name:value -> streamIDs row.
|
||||
//
|
||||
// b cannot be re-used until Reset call.
|
||||
//
|
||||
// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail.
|
||||
func (sp *tagToStreamIDsRowParser) Init(b []byte) error {
|
||||
tail, nsPrefix, err := unmarshalCommonPrefix(&sp.TenantID, b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid tenantID:name:value -> streamIDs row %q: %w", b, err)
|
||||
}
|
||||
if nsPrefix != nsPrefixTagToStreamIDs {
|
||||
return fmt.Errorf("invalid prefix for tenantID:name:value -> streamIDs row %q; got %d; want %d", b, nsPrefix, nsPrefixTagToStreamIDs)
|
||||
}
|
||||
tail, err = sp.Tag.indexdbUnmarshal(tail)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal tag from tenantID:name:value -> streamIDs row %q: %w", b, err)
|
||||
}
|
||||
if err = sp.InitOnlyTail(tail); err != nil {
|
||||
return fmt.Errorf("cannot initialize tail from tenantID:name:value -> streamIDs row %q: %w", b, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalPrefix marshals row prefix without tail to dst.
|
||||
func (sp *tagToStreamIDsRowParser) MarshalPrefix(dst []byte) []byte {
|
||||
dst = marshalCommonPrefix(dst, nsPrefixTagToStreamIDs, sp.TenantID)
|
||||
dst = sp.Tag.indexdbMarshal(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
// InitOnlyTail initializes sp.tail from tail, which must contain streamIDs.
|
||||
//
|
||||
// tail cannot be re-used until Reset call.
|
||||
//
|
||||
// ParseStreamIDs() must be called later for obtaining sp.StreamIDs from the given tail.
|
||||
func (sp *tagToStreamIDsRowParser) InitOnlyTail(tail []byte) error {
|
||||
if len(tail) == 0 {
|
||||
return fmt.Errorf("missing streamID in the tenantID:name:value -> streamIDs row")
|
||||
}
|
||||
if len(tail)%16 != 0 {
|
||||
return fmt.Errorf("invalid tail length in the tenantID:name:value -> streamIDs row; got %d bytes; must be multiple of 16 bytes", len(tail))
|
||||
}
|
||||
sp.tail = tail
|
||||
sp.streamIDsParsed = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// EqualPrefix returns true if prefixes for sp and x are equal.
|
||||
//
|
||||
// Prefix contains (tenantID:name:value)
|
||||
func (sp *tagToStreamIDsRowParser) EqualPrefix(x *tagToStreamIDsRowParser) bool {
|
||||
if !sp.TenantID.equal(&x.TenantID) {
|
||||
return false
|
||||
}
|
||||
if !sp.Tag.equal(&x.Tag) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// StreamIDsLen returns the number of StreamIDs in the sp.tail
|
||||
func (sp *tagToStreamIDsRowParser) StreamIDsLen() int {
|
||||
return len(sp.tail) / 16
|
||||
}
|
||||
|
||||
// ParseStreamIDs parses StreamIDs from sp.tail into sp.StreamIDs.
|
||||
func (sp *tagToStreamIDsRowParser) ParseStreamIDs() {
|
||||
if sp.streamIDsParsed {
|
||||
return
|
||||
}
|
||||
tail := sp.tail
|
||||
n := len(tail) / 16
|
||||
streamIDs := sp.StreamIDs[:0]
|
||||
if n <= cap(streamIDs) {
|
||||
streamIDs = streamIDs[:n]
|
||||
} else {
|
||||
streamIDs = append(streamIDs[:cap(streamIDs)], make([]u128, n-cap(streamIDs))...)
|
||||
}
|
||||
sp.StreamIDs = streamIDs
|
||||
for i := 0; i < n; i++ {
|
||||
var err error
|
||||
tail, err = streamIDs[i].unmarshal(tail)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal streamID: %s", err)
|
||||
}
|
||||
}
|
||||
sp.streamIDsParsed = true
|
||||
}
|
||||
|
||||
func (sp *tagToStreamIDsRowParser) UpdateStreamIDs(ids map[u128]struct{}, tail []byte) {
|
||||
sp.Reset()
|
||||
if err := sp.InitOnlyTail(tail); err != nil {
|
||||
logger.Panicf("FATAL: cannot parse '(date, tag) -> streamIDs' row: %s", err)
|
||||
}
|
||||
sp.ParseStreamIDs()
|
||||
for _, id := range sp.StreamIDs {
|
||||
ids[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// commonPrefixLen is the length of common prefix for indexdb rows
|
||||
// 1 byte for ns* prefix + 8 bytes for tenantID
|
||||
const commonPrefixLen = 1 + 8
|
||||
|
||||
func marshalCommonPrefix(dst []byte, nsPrefix byte, tenantID TenantID) []byte {
|
||||
dst = append(dst, nsPrefix)
|
||||
dst = tenantID.marshal(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
func unmarshalCommonPrefix(dstTenantID *TenantID, src []byte) ([]byte, byte, error) {
|
||||
if len(src) < commonPrefixLen {
|
||||
return nil, 0, fmt.Errorf("cannot unmarshal common prefix from %d bytes; need at least %d bytes; data=%X", len(src), commonPrefixLen, src)
|
||||
}
|
||||
prefix := src[0]
|
||||
src = src[1:]
|
||||
tail, err := dstTenantID.unmarshal(src)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("cannot unmarshal tenantID: %s", err)
|
||||
}
|
||||
return tail, prefix, nil
|
||||
}
|
||||
|
||||
func checkItemsSorted(data []byte, items []mergeset.Item) bool {
|
||||
if len(items) == 0 {
|
||||
return true
|
||||
}
|
||||
prevItem := items[0].String(data)
|
||||
for _, it := range items[1:] {
|
||||
currItem := it.String(data)
|
||||
if prevItem > currItem {
|
||||
return false
|
||||
}
|
||||
prevItem = currItem
|
||||
}
|
||||
return true
|
||||
}
|
253
lib/logstorage/indexdb_test.go
Normal file
253
lib/logstorage/indexdb_test.go
Normal file
|
@ -0,0 +1,253 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestStorageSearchStreamIDs(t *testing.T) {
|
||||
const path = "TestStorageSearchStreamIDs"
|
||||
const partitionName = "foobar"
|
||||
s := newTestStorage()
|
||||
mustCreateIndexdb(path)
|
||||
idb := mustOpenIndexdb(path, partitionName, s)
|
||||
|
||||
tenantID := TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 567,
|
||||
}
|
||||
getStreamIDForTags := func(tags map[string]string) (streamID, []byte) {
|
||||
st := GetStreamTags()
|
||||
for k, v := range tags {
|
||||
st.Add(k, v)
|
||||
}
|
||||
streamTagsCanonical := st.MarshalCanonical(nil)
|
||||
PutStreamTags(st)
|
||||
id := hash128(streamTagsCanonical)
|
||||
sid := streamID{
|
||||
tenantID: tenantID,
|
||||
id: id,
|
||||
}
|
||||
return sid, streamTagsCanonical
|
||||
}
|
||||
|
||||
// Create indexdb entries
|
||||
const jobsCount = 7
|
||||
const instancesCount = 5
|
||||
for i := 0; i < jobsCount; i++ {
|
||||
for j := 0; j < instancesCount; j++ {
|
||||
sid, streamTagsCanonical := getStreamIDForTags(map[string]string{
|
||||
"job": fmt.Sprintf("job-%d", i),
|
||||
"instance": fmt.Sprintf("instance-%d", j),
|
||||
})
|
||||
idb.mustRegisterStream(&sid, streamTagsCanonical)
|
||||
}
|
||||
}
|
||||
idb.debugFlush()
|
||||
|
||||
f := func(streamFilter string, expectedStreamIDs []streamID) {
|
||||
t.Helper()
|
||||
sf := mustNewStreamFilter(streamFilter)
|
||||
if expectedStreamIDs == nil {
|
||||
expectedStreamIDs = []streamID{}
|
||||
}
|
||||
sortStreamIDs(expectedStreamIDs)
|
||||
for i := 0; i < 3; i++ {
|
||||
streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
|
||||
if !reflect.DeepEqual(streamIDs, expectedStreamIDs) {
|
||||
t.Fatalf("unexpected streamIDs on iteration %d; got %v; want %v", i, streamIDs, expectedStreamIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Run("missing-tenant-id", func(t *testing.T) {
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 2,
|
||||
}
|
||||
sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`)
|
||||
for i := 0; i < 3; i++ {
|
||||
streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
|
||||
if len(streamIDs) > 0 {
|
||||
t.Fatalf("unexpected non-empty streamIDs on iteration %d: %d", i, len(streamIDs))
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("missing-job", func(t *testing.T) {
|
||||
f(`{job="non-existing-job",instance="instance-0"}`, nil)
|
||||
})
|
||||
t.Run("missing-job-re", func(t *testing.T) {
|
||||
f(`{job=~"non-existing-job|",instance="instance-0"}`, nil)
|
||||
})
|
||||
t.Run("missing-job-negative-re", func(t *testing.T) {
|
||||
f(`{job!~"job.+",instance="instance-0"}`, nil)
|
||||
})
|
||||
t.Run("empty-job", func(t *testing.T) {
|
||||
f(`{job="",instance="instance-0"}`, nil)
|
||||
})
|
||||
t.Run("missing-instance", func(t *testing.T) {
|
||||
f(`{job="job-0",instance="non-existing-instance"}`, nil)
|
||||
})
|
||||
t.Run("missing-instance-re", func(t *testing.T) {
|
||||
f(`{job="job-0",instance=~"non-existing-instance|"}`, nil)
|
||||
})
|
||||
t.Run("missing-instance-negative-re", func(t *testing.T) {
|
||||
f(`{job="job-0",instance!~"instance.+"}`, nil)
|
||||
})
|
||||
t.Run("empty-instance", func(t *testing.T) {
|
||||
f(`{job="job-0",instance=""}`, nil)
|
||||
})
|
||||
t.Run("non-existing-tag", func(t *testing.T) {
|
||||
f(`{job="job-0",instance="instance-0",non_existing_tag="foobar"}`, nil)
|
||||
})
|
||||
t.Run("non-existing-non-empty-tag", func(t *testing.T) {
|
||||
f(`{job="job-0",instance="instance-0",non_existing_tag!=""}`, nil)
|
||||
})
|
||||
t.Run("non-existing-tag-re", func(t *testing.T) {
|
||||
f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo.+"}`, nil)
|
||||
})
|
||||
t.Run("non-existing-non-empty-tag-re", func(t *testing.T) {
|
||||
f(`{job="job-0",instance="instance-0",non_existing_tag!~""}`, nil)
|
||||
})
|
||||
|
||||
t.Run("match-job-instance", func(t *testing.T) {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": "instance-0",
|
||||
"job": "job-0",
|
||||
})
|
||||
f(`{job="job-0",instance="instance-0"}`, []streamID{sid})
|
||||
})
|
||||
t.Run("match-non-existing-tag", func(t *testing.T) {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": "instance-0",
|
||||
"job": "job-0",
|
||||
})
|
||||
f(`{job="job-0",instance="instance-0",non_existing_tag=~"foo|"}`, []streamID{sid})
|
||||
})
|
||||
t.Run("match-job", func(t *testing.T) {
|
||||
var streamIDs []streamID
|
||||
for i := 0; i < instancesCount; i++ {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", i),
|
||||
"job": "job-0",
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
f(`{job="job-0"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-instance", func(t *testing.T) {
|
||||
var streamIDs []streamID
|
||||
for i := 0; i < jobsCount; i++ {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": "instance-1",
|
||||
"job": fmt.Sprintf("job-%d", i),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
f(`{instance="instance-1"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-re", func(t *testing.T) {
|
||||
var streamIDs []streamID
|
||||
for _, instanceID := range []int{3, 1} {
|
||||
for _, jobID := range []int{0, 2} {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", instanceID),
|
||||
"job": fmt.Sprintf("job-%d", jobID),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
}
|
||||
f(`{job=~"job-(0|2)",instance=~"instance-[13]"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-re-empty-match", func(t *testing.T) {
|
||||
var streamIDs []streamID
|
||||
for _, instanceID := range []int{3, 1} {
|
||||
for _, jobID := range []int{0, 2} {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", instanceID),
|
||||
"job": fmt.Sprintf("job-%d", jobID),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
}
|
||||
f(`{job=~"job-(0|2)|",instance=~"instance-[13]"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-negative-re", func(t *testing.T) {
|
||||
var instanceIDs []int
|
||||
for i := 0; i < instancesCount; i++ {
|
||||
if i != 0 && i != 1 {
|
||||
instanceIDs = append(instanceIDs, i)
|
||||
}
|
||||
}
|
||||
var jobIDs []int
|
||||
for i := 0; i < jobsCount; i++ {
|
||||
if i > 2 {
|
||||
jobIDs = append(jobIDs, i)
|
||||
}
|
||||
}
|
||||
var streamIDs []streamID
|
||||
for _, instanceID := range instanceIDs {
|
||||
for _, jobID := range jobIDs {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", instanceID),
|
||||
"job": fmt.Sprintf("job-%d", jobID),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
}
|
||||
f(`{job!~"job-[0-2]",instance!~"instance-(0|1)"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-negative-re-empty-match", func(t *testing.T) {
|
||||
var instanceIDs []int
|
||||
for i := 0; i < instancesCount; i++ {
|
||||
if i != 0 && i != 1 {
|
||||
instanceIDs = append(instanceIDs, i)
|
||||
}
|
||||
}
|
||||
var jobIDs []int
|
||||
for i := 0; i < jobsCount; i++ {
|
||||
if i > 2 {
|
||||
jobIDs = append(jobIDs, i)
|
||||
}
|
||||
}
|
||||
var streamIDs []streamID
|
||||
for _, instanceID := range instanceIDs {
|
||||
for _, jobID := range jobIDs {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", instanceID),
|
||||
"job": fmt.Sprintf("job-%d", jobID),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
}
|
||||
f(`{job!~"job-[0-2]",instance!~"instance-(0|1)|"}`, streamIDs)
|
||||
})
|
||||
t.Run("match-negative-job", func(t *testing.T) {
|
||||
instanceIDs := []int{2}
|
||||
var jobIDs []int
|
||||
for i := 0; i < jobsCount; i++ {
|
||||
if i != 1 {
|
||||
jobIDs = append(jobIDs, i)
|
||||
}
|
||||
}
|
||||
var streamIDs []streamID
|
||||
for _, instanceID := range instanceIDs {
|
||||
for _, jobID := range jobIDs {
|
||||
sid, _ := getStreamIDForTags(map[string]string{
|
||||
"instance": fmt.Sprintf("instance-%d", instanceID),
|
||||
"job": fmt.Sprintf("job-%d", jobID),
|
||||
})
|
||||
streamIDs = append(streamIDs, sid)
|
||||
}
|
||||
}
|
||||
f(`{instance="instance-2",job!="job-1"}`, streamIDs)
|
||||
})
|
||||
|
||||
mustCloseIndexdb(idb)
|
||||
fs.MustRemoveAll(path)
|
||||
|
||||
closeTestStorage(s)
|
||||
}
|
155
lib/logstorage/inmemory_part.go
Normal file
155
lib/logstorage/inmemory_part.go
Normal file
|
@ -0,0 +1,155 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
// inmemoryPart is an in-memory part.
|
||||
type inmemoryPart struct {
|
||||
// ph contains partHeader information for the given in-memory part.
|
||||
ph partHeader
|
||||
|
||||
metaindex bytesutil.ByteBuffer
|
||||
index bytesutil.ByteBuffer
|
||||
columnsHeader bytesutil.ByteBuffer
|
||||
timestamps bytesutil.ByteBuffer
|
||||
fieldValues bytesutil.ByteBuffer
|
||||
fieldBloomFilter bytesutil.ByteBuffer
|
||||
messageValues bytesutil.ByteBuffer
|
||||
messageBloomFilter bytesutil.ByteBuffer
|
||||
}
|
||||
|
||||
// reset resets mp, so it can be re-used
|
||||
func (mp *inmemoryPart) reset() {
|
||||
mp.ph.reset()
|
||||
|
||||
mp.metaindex.Reset()
|
||||
mp.index.Reset()
|
||||
mp.columnsHeader.Reset()
|
||||
mp.timestamps.Reset()
|
||||
mp.fieldValues.Reset()
|
||||
mp.fieldBloomFilter.Reset()
|
||||
mp.messageValues.Reset()
|
||||
mp.messageBloomFilter.Reset()
|
||||
}
|
||||
|
||||
// mustInitFromRows initializes mp from lr.
|
||||
func (mp *inmemoryPart) mustInitFromRows(lr *LogRows) {
|
||||
mp.reset()
|
||||
|
||||
if len(lr.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
sort.Sort(lr)
|
||||
|
||||
bsw := getBlockStreamWriter()
|
||||
bsw.MustInitForInmemoryPart(mp)
|
||||
trs := getTmpRows()
|
||||
var sidPrev *streamID
|
||||
uncompressedBlockSizeBytes := uint64(0)
|
||||
timestamps := lr.timestamps
|
||||
rows := lr.rows
|
||||
streamIDs := lr.streamIDs
|
||||
for i := range timestamps {
|
||||
streamID := &streamIDs[i]
|
||||
if sidPrev == nil {
|
||||
sidPrev = streamID
|
||||
}
|
||||
|
||||
if uncompressedBlockSizeBytes >= maxUncompressedBlockSize || !streamID.equal(sidPrev) {
|
||||
bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows)
|
||||
trs.reset()
|
||||
sidPrev = streamID
|
||||
uncompressedBlockSizeBytes = 0
|
||||
}
|
||||
fields := rows[i]
|
||||
trs.timestamps = append(trs.timestamps, timestamps[i])
|
||||
trs.rows = append(trs.rows, fields)
|
||||
uncompressedBlockSizeBytes += uncompressedRowSizeBytes(fields)
|
||||
}
|
||||
bsw.MustWriteRows(sidPrev, trs.timestamps, trs.rows)
|
||||
putTmpRows(trs)
|
||||
bsw.Finalize(&mp.ph)
|
||||
putBlockStreamWriter(bsw)
|
||||
}
|
||||
|
||||
// MustStoreToDisk stores mp to disk at the given path.
|
||||
func (mp *inmemoryPart) MustStoreToDisk(path string) {
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
fieldValuesPath := filepath.Join(path, fieldValuesFilename)
|
||||
fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
|
||||
fs.MustWriteSync(metaindexPath, mp.metaindex.B)
|
||||
fs.MustWriteSync(indexPath, mp.index.B)
|
||||
fs.MustWriteSync(columnsHeaderPath, mp.columnsHeader.B)
|
||||
fs.MustWriteSync(timestampsPath, mp.timestamps.B)
|
||||
fs.MustWriteSync(fieldValuesPath, mp.fieldValues.B)
|
||||
fs.MustWriteSync(fieldBloomFilterPath, mp.fieldBloomFilter.B)
|
||||
fs.MustWriteSync(messageValuesPath, mp.messageValues.B)
|
||||
fs.MustWriteSync(messageBloomFilterPath, mp.messageBloomFilter.B)
|
||||
|
||||
mp.ph.mustWriteMetadata(path)
|
||||
|
||||
fs.MustSyncPath(path)
|
||||
// Do not sync parent directory - it must be synced by the caller.
|
||||
}
|
||||
|
||||
// tmpRows is used as a helper for inmemoryPart.mustInitFromRows()
|
||||
type tmpRows struct {
|
||||
timestamps []int64
|
||||
|
||||
rows [][]Field
|
||||
}
|
||||
|
||||
func (trs *tmpRows) reset() {
|
||||
trs.timestamps = trs.timestamps[:0]
|
||||
|
||||
rows := trs.rows
|
||||
for i := range rows {
|
||||
rows[i] = nil
|
||||
}
|
||||
trs.rows = rows[:0]
|
||||
}
|
||||
|
||||
func getTmpRows() *tmpRows {
|
||||
v := tmpRowsPool.Get()
|
||||
if v == nil {
|
||||
return &tmpRows{}
|
||||
}
|
||||
return v.(*tmpRows)
|
||||
}
|
||||
|
||||
func putTmpRows(trs *tmpRows) {
|
||||
trs.reset()
|
||||
tmpRowsPool.Put(trs)
|
||||
}
|
||||
|
||||
var tmpRowsPool sync.Pool
|
||||
|
||||
func getInmemoryPart() *inmemoryPart {
|
||||
v := inmemoryPartPool.Get()
|
||||
if v == nil {
|
||||
return &inmemoryPart{}
|
||||
}
|
||||
return v.(*inmemoryPart)
|
||||
}
|
||||
|
||||
func putInmemoryPart(mp *inmemoryPart) {
|
||||
mp.reset()
|
||||
inmemoryPartPool.Put(mp)
|
||||
}
|
||||
|
||||
var inmemoryPartPool sync.Pool
|
343
lib/logstorage/inmemory_part_test.go
Normal file
343
lib/logstorage/inmemory_part_test.go
Normal file
|
@ -0,0 +1,343 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
func TestInmemoryPartMustInitFromRows(t *testing.T) {
|
||||
f := func(lr *LogRows, blocksCountExpected int, compressionRateExpected float64) {
|
||||
t.Helper()
|
||||
|
||||
uncompressedSizeBytesExpected := uncompressedRowsSizeBytes(lr.rows)
|
||||
rowsCountExpected := len(lr.timestamps)
|
||||
minTimestampExpected := int64(math.MaxInt64)
|
||||
maxTimestampExpected := int64(math.MinInt64)
|
||||
|
||||
// make a copy of lr - it is used for comapring the results later,
|
||||
// since lr may be modified by inmemoryPart.mustInitFromRows()
|
||||
lrOrig := GetLogRows(nil, nil)
|
||||
for i, timestamp := range lr.timestamps {
|
||||
if timestamp < minTimestampExpected {
|
||||
minTimestampExpected = timestamp
|
||||
}
|
||||
if timestamp > maxTimestampExpected {
|
||||
maxTimestampExpected = timestamp
|
||||
}
|
||||
lrOrig.mustAddInternal(lr.streamIDs[i], timestamp, lr.rows[i], lr.streamTagsCanonicals[i])
|
||||
}
|
||||
|
||||
// Create inmemory part from lr
|
||||
mp := getInmemoryPart()
|
||||
mp.mustInitFromRows(lr)
|
||||
|
||||
// Check mp.ph
|
||||
ph := &mp.ph
|
||||
checkCompressionRate(t, ph, compressionRateExpected)
|
||||
if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
|
||||
t.Fatalf("unexpected UncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
|
||||
}
|
||||
if ph.RowsCount != uint64(rowsCountExpected) {
|
||||
t.Fatalf("unexpected rowsCount in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
|
||||
}
|
||||
if ph.BlocksCount != uint64(blocksCountExpected) {
|
||||
t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
|
||||
}
|
||||
if ph.RowsCount > 0 {
|
||||
if ph.MinTimestamp != minTimestampExpected {
|
||||
t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
|
||||
}
|
||||
if ph.MaxTimestamp != maxTimestampExpected {
|
||||
t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// Read log entries from mp to rrsResult
|
||||
sbu := getStringsBlockUnmarshaler()
|
||||
defer putStringsBlockUnmarshaler(sbu)
|
||||
vd := getValuesDecoder()
|
||||
defer putValuesDecoder(vd)
|
||||
lrResult := mp.readLogRows(sbu, vd)
|
||||
putInmemoryPart(mp)
|
||||
|
||||
// compare lrOrig to lrResult
|
||||
if err := checkEqualRows(lrResult, lrOrig); err != nil {
|
||||
t.Fatalf("unequal log entries: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
f(GetLogRows(nil, nil), 0, 0)
|
||||
|
||||
// Check how inmemoryPart works with a single stream
|
||||
f(newTestLogRows(1, 1, 0), 1, 0.8)
|
||||
f(newTestLogRows(1, 2, 0), 1, 0.9)
|
||||
f(newTestLogRows(1, 10, 0), 1, 2.0)
|
||||
f(newTestLogRows(1, 1000, 0), 1, 7.1)
|
||||
f(newTestLogRows(1, 20000, 0), 2, 7.2)
|
||||
|
||||
// Check how inmemoryPart works with multiple streams
|
||||
f(newTestLogRows(2, 1, 0), 2, 0.8)
|
||||
f(newTestLogRows(10, 1, 0), 10, 0.9)
|
||||
f(newTestLogRows(100, 1, 0), 100, 1.0)
|
||||
f(newTestLogRows(10, 5, 0), 10, 1.4)
|
||||
f(newTestLogRows(10, 1000, 0), 10, 7.2)
|
||||
f(newTestLogRows(100, 100, 0), 100, 5.0)
|
||||
}
|
||||
|
||||
func checkCompressionRate(t *testing.T, ph *partHeader, compressionRateExpected float64) {
|
||||
t.Helper()
|
||||
compressionRate := float64(ph.UncompressedSizeBytes) / float64(ph.CompressedSizeBytes)
|
||||
if math.Abs(compressionRate-compressionRateExpected) > 0.1 {
|
||||
t.Fatalf("unexpected compression rate; got %.1f; want %.1f", compressionRate, compressionRateExpected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInmemoryPartInitFromBlockStreamReaders(t *testing.T) {
|
||||
f := func(lrs []*LogRows, blocksCountExpected int, compressionRateExpected float64) {
|
||||
t.Helper()
|
||||
|
||||
uncompressedSizeBytesExpected := uint64(0)
|
||||
rowsCountExpected := 0
|
||||
minTimestampExpected := int64(math.MaxInt64)
|
||||
maxTimestampExpected := int64(math.MinInt64)
|
||||
|
||||
// make a copy of rrss in order to compare the results after merge.
|
||||
lrOrig := GetLogRows(nil, nil)
|
||||
for _, lr := range lrs {
|
||||
uncompressedSizeBytesExpected += uncompressedRowsSizeBytes(lr.rows)
|
||||
rowsCountExpected += len(lr.timestamps)
|
||||
for j, timestamp := range lr.timestamps {
|
||||
if timestamp < minTimestampExpected {
|
||||
minTimestampExpected = timestamp
|
||||
}
|
||||
if timestamp > maxTimestampExpected {
|
||||
maxTimestampExpected = timestamp
|
||||
}
|
||||
lrOrig.mustAddInternal(lr.streamIDs[j], timestamp, lr.rows[j], lr.streamTagsCanonicals[j])
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize readers from lrs
|
||||
var mpsSrc []*inmemoryPart
|
||||
var bsrs []*blockStreamReader
|
||||
for _, lr := range lrs {
|
||||
mp := getInmemoryPart()
|
||||
mp.mustInitFromRows(lr)
|
||||
mpsSrc = append(mpsSrc, mp)
|
||||
|
||||
bsr := getBlockStreamReader()
|
||||
bsr.MustInitFromInmemoryPart(mp)
|
||||
bsrs = append(bsrs, bsr)
|
||||
}
|
||||
defer func() {
|
||||
for _, bsr := range bsrs {
|
||||
putBlockStreamReader(bsr)
|
||||
}
|
||||
for _, mp := range mpsSrc {
|
||||
putInmemoryPart(mp)
|
||||
}
|
||||
}()
|
||||
|
||||
// Merge data from bsrs into mpDst
|
||||
mpDst := getInmemoryPart()
|
||||
bsw := getBlockStreamWriter()
|
||||
bsw.MustInitForInmemoryPart(mpDst)
|
||||
mustMergeBlockStreams(&mpDst.ph, bsw, bsrs, nil)
|
||||
putBlockStreamWriter(bsw)
|
||||
|
||||
// Check mpDst.ph stats
|
||||
ph := &mpDst.ph
|
||||
checkCompressionRate(t, ph, compressionRateExpected)
|
||||
if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
|
||||
t.Fatalf("unexpected uncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
|
||||
}
|
||||
if ph.RowsCount != uint64(rowsCountExpected) {
|
||||
t.Fatalf("unexpected number of entries in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
|
||||
}
|
||||
if ph.BlocksCount != uint64(blocksCountExpected) {
|
||||
t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
|
||||
}
|
||||
if ph.RowsCount > 0 {
|
||||
if ph.MinTimestamp != minTimestampExpected {
|
||||
t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
|
||||
}
|
||||
if ph.MaxTimestamp != maxTimestampExpected {
|
||||
t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// Read log entries from mpDst to rrsResult
|
||||
sbu := getStringsBlockUnmarshaler()
|
||||
defer putStringsBlockUnmarshaler(sbu)
|
||||
vd := getValuesDecoder()
|
||||
defer putValuesDecoder(vd)
|
||||
lrResult := mpDst.readLogRows(sbu, vd)
|
||||
putInmemoryPart(mpDst)
|
||||
|
||||
// compare rrsOrig to rrsResult
|
||||
if err := checkEqualRows(lrResult, lrOrig); err != nil {
|
||||
t.Fatalf("unequal log entries: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Check empty readers
|
||||
f(nil, 0, 0)
|
||||
f([]*LogRows{GetLogRows(nil, nil)}, 0, 0)
|
||||
f([]*LogRows{GetLogRows(nil, nil), GetLogRows(nil, nil)}, 0, 0)
|
||||
|
||||
// Check merge with a single reader
|
||||
f([]*LogRows{newTestLogRows(1, 1, 0)}, 1, 0.8)
|
||||
f([]*LogRows{newTestLogRows(1, 10, 0)}, 1, 2.0)
|
||||
f([]*LogRows{newTestLogRows(1, 100, 0)}, 1, 4.9)
|
||||
f([]*LogRows{newTestLogRows(1, 1000, 0)}, 1, 7.1)
|
||||
f([]*LogRows{newTestLogRows(1, 10000, 0)}, 1, 7.4)
|
||||
f([]*LogRows{newTestLogRows(10, 1, 0)}, 10, 0.9)
|
||||
f([]*LogRows{newTestLogRows(100, 1, 0)}, 100, 1.0)
|
||||
f([]*LogRows{newTestLogRows(1000, 1, 0)}, 1000, 1.0)
|
||||
f([]*LogRows{newTestLogRows(10, 10, 0)}, 10, 2.1)
|
||||
f([]*LogRows{newTestLogRows(10, 100, 0)}, 10, 4.9)
|
||||
|
||||
//Check merge with multiple readers
|
||||
f([]*LogRows{
|
||||
newTestLogRows(1, 1, 0),
|
||||
newTestLogRows(1, 1, 1),
|
||||
}, 2, 0.9)
|
||||
f([]*LogRows{
|
||||
newTestLogRows(2, 2, 0),
|
||||
newTestLogRows(2, 2, 0),
|
||||
}, 2, 1.8)
|
||||
f([]*LogRows{
|
||||
newTestLogRows(1, 20, 0),
|
||||
newTestLogRows(1, 10, 1),
|
||||
newTestLogRows(1, 5, 2),
|
||||
}, 3, 2.2)
|
||||
f([]*LogRows{
|
||||
newTestLogRows(10, 20, 0),
|
||||
newTestLogRows(20, 10, 1),
|
||||
newTestLogRows(30, 5, 2),
|
||||
}, 60, 2.0)
|
||||
f([]*LogRows{
|
||||
newTestLogRows(10, 20, 0),
|
||||
newTestLogRows(20, 10, 1),
|
||||
newTestLogRows(30, 5, 2),
|
||||
newTestLogRows(20, 7, 3),
|
||||
newTestLogRows(10, 9, 4),
|
||||
}, 90, 1.9)
|
||||
}
|
||||
|
||||
func newTestLogRows(streams, rowsPerStream int, seed int64) *LogRows {
|
||||
streamTags := []string{
|
||||
"some-stream-tag",
|
||||
}
|
||||
lr := GetLogRows(streamTags, nil)
|
||||
rng := rand.New(rand.NewSource(seed))
|
||||
var fields []Field
|
||||
for i := 0; i < streams; i++ {
|
||||
tenantID := TenantID{
|
||||
AccountID: rng.Uint32(),
|
||||
ProjectID: rng.Uint32(),
|
||||
}
|
||||
for j := 0; j < rowsPerStream; j++ {
|
||||
// Add stream tags
|
||||
fields = append(fields[:0], Field{
|
||||
Name: "some-stream-tag",
|
||||
Value: fmt.Sprintf("some-stream-value-%d", i),
|
||||
})
|
||||
// Add the remaining tags
|
||||
for k := 0; k < 5; k++ {
|
||||
if rng.Float64() < 0.5 {
|
||||
fields = append(fields, Field{
|
||||
Name: fmt.Sprintf("field_%d", k),
|
||||
Value: fmt.Sprintf("value_%d_%d_%d", i, j, k),
|
||||
})
|
||||
}
|
||||
}
|
||||
// add a message field
|
||||
fields = append(fields, Field{
|
||||
Name: "",
|
||||
Value: fmt.Sprintf("some row number %d at stream %d", j, i),
|
||||
})
|
||||
// add a field with constant value
|
||||
fields = append(fields, Field{
|
||||
Name: "job",
|
||||
Value: "foobar",
|
||||
})
|
||||
// add a field with uint value
|
||||
fields = append(fields, Field{
|
||||
Name: "response_size_bytes",
|
||||
Value: fmt.Sprintf("%d", rng.Intn(1234)),
|
||||
})
|
||||
// shuffle fields in order to check de-shuffling algorithm
|
||||
rng.Shuffle(len(fields), func(i, j int) {
|
||||
fields[i], fields[j] = fields[j], fields[i]
|
||||
})
|
||||
timestamp := rng.Int63()
|
||||
lr.MustAdd(tenantID, timestamp, fields)
|
||||
}
|
||||
}
|
||||
return lr
|
||||
}
|
||||
|
||||
func checkEqualRows(lrResult, lrOrig *LogRows) error {
|
||||
if len(lrResult.timestamps) != len(lrOrig.timestamps) {
|
||||
return fmt.Errorf("unexpected length LogRows; got %d; want %d", len(lrResult.timestamps), len(lrOrig.timestamps))
|
||||
}
|
||||
|
||||
sort.Sort(lrResult)
|
||||
sort.Sort(lrOrig)
|
||||
|
||||
sortFieldNames := func(fields []Field) {
|
||||
sort.Slice(fields, func(i, j int) bool {
|
||||
return fields[i].Name < fields[j].Name
|
||||
})
|
||||
}
|
||||
for i := range lrOrig.timestamps {
|
||||
if !lrOrig.streamIDs[i].equal(&lrResult.streamIDs[i]) {
|
||||
return fmt.Errorf("unexpected streamID for log entry %d\ngot\n%s\nwant\n%s", i, &lrResult.streamIDs[i], &lrOrig.streamIDs[i])
|
||||
}
|
||||
if lrOrig.timestamps[i] != lrResult.timestamps[i] {
|
||||
return fmt.Errorf("unexpected timestamp for log entry %d\ngot\n%d\nwant\n%d", i, lrResult.timestamps[i], lrOrig.timestamps[i])
|
||||
}
|
||||
fieldsOrig := lrOrig.rows[i]
|
||||
fieldsResult := lrResult.rows[i]
|
||||
if len(fieldsOrig) != len(fieldsResult) {
|
||||
return fmt.Errorf("unexpected number of fields at log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
|
||||
}
|
||||
sortFieldNames(fieldsOrig)
|
||||
sortFieldNames(fieldsResult)
|
||||
if !reflect.DeepEqual(fieldsOrig, fieldsResult) {
|
||||
return fmt.Errorf("unexpected fields for log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readLogRows reads log entries from mp.
|
||||
//
|
||||
// This function is for testing and debugging purposes only.
|
||||
func (mp *inmemoryPart) readLogRows(sbu *stringsBlockUnmarshaler, vd *valuesDecoder) *LogRows {
|
||||
lr := GetLogRows(nil, nil)
|
||||
bsr := getBlockStreamReader()
|
||||
defer putBlockStreamReader(bsr)
|
||||
bsr.MustInitFromInmemoryPart(mp)
|
||||
var tmp rows
|
||||
for bsr.NextBlock() {
|
||||
bd := &bsr.blockData
|
||||
streamID := bd.streamID
|
||||
if err := bd.unmarshalRows(&tmp, sbu, vd); err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal log entries from inmemoryPart: %s", err)
|
||||
}
|
||||
for i, timestamp := range tmp.timestamps {
|
||||
lr.MustAdd(streamID.tenantID, timestamp, tmp.rows[i])
|
||||
lr.streamIDs[len(lr.streamIDs)-1] = streamID
|
||||
}
|
||||
tmp.reset()
|
||||
}
|
||||
return lr
|
||||
}
|
34
lib/logstorage/inmemory_part_timing_test.go
Normal file
34
lib/logstorage/inmemory_part_timing_test.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkInmemoryPart_MustInitFromRows(b *testing.B) {
|
||||
for _, streams := range []int{1, 10, 100} {
|
||||
b.Run(fmt.Sprintf("streams_%d", streams), func(b *testing.B) {
|
||||
for _, rowsPerStream := range []int{1, 10, 100, 1000} {
|
||||
b.Run(fmt.Sprintf("rowsPerStream_%d", rowsPerStream), func(b *testing.B) {
|
||||
benchmarkInmemoryPartMustInitFromRows(b, streams, rowsPerStream)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkInmemoryPartMustInitFromRows(b *testing.B, streams, rowsPerStream int) {
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(streams * rowsPerStream))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
lr := newTestLogRows(streams, rowsPerStream, 0)
|
||||
mp := getInmemoryPart()
|
||||
for pb.Next() {
|
||||
mp.mustInitFromRows(lr)
|
||||
if mp.ph.RowsCount != uint64(len(lr.timestamps)) {
|
||||
panic(fmt.Errorf("unexpecte number of entries in the output stream; got %d; want %d", mp.ph.RowsCount, len(lr.timestamps)))
|
||||
}
|
||||
}
|
||||
putInmemoryPart(mp)
|
||||
})
|
||||
}
|
277
lib/logstorage/log_rows.go
Normal file
277
lib/logstorage/log_rows.go
Normal file
|
@ -0,0 +1,277 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
// LogRows holds a set of rows needed for Storage.MustAddRows
|
||||
//
|
||||
// LogRows must be obtained via GetLogRows()
|
||||
type LogRows struct {
|
||||
// buf holds all the bytes referred by items in LogRows
|
||||
buf []byte
|
||||
|
||||
// fieldsBuf holds all the fields referred by items in LogRows
|
||||
fieldsBuf []Field
|
||||
|
||||
// streamIDs holds streamIDs for rows added to LogRows
|
||||
streamIDs []streamID
|
||||
|
||||
// streamTagsCanonicals holds streamTagsCanonical entries for rows added to LogRows
|
||||
streamTagsCanonicals [][]byte
|
||||
|
||||
// timestamps holds stimestamps for rows added to LogRows
|
||||
timestamps []int64
|
||||
|
||||
// rows holds fields for rows atted to LogRows.
|
||||
rows [][]Field
|
||||
|
||||
// sf is a helper for sorting fields in every added row
|
||||
sf sortedFields
|
||||
|
||||
// streamFields contains names for stream fields
|
||||
streamFields map[string]struct{}
|
||||
|
||||
// ignoreFields contains names for log fields, which must be skipped during data ingestion
|
||||
ignoreFields map[string]struct{}
|
||||
}
|
||||
|
||||
type sortedFields []Field
|
||||
|
||||
func (sf *sortedFields) Len() int {
|
||||
return len(*sf)
|
||||
}
|
||||
|
||||
func (sf *sortedFields) Less(i, j int) bool {
|
||||
a := *sf
|
||||
return a[i].Name < a[j].Name
|
||||
}
|
||||
|
||||
func (sf *sortedFields) Swap(i, j int) {
|
||||
a := *sf
|
||||
a[i], a[j] = a[j], a[i]
|
||||
}
|
||||
|
||||
// RowFormatter implementes fmt.Stringer for []Field aka a single log row
|
||||
type RowFormatter []Field
|
||||
|
||||
// String returns user-readable representation for rf
|
||||
func (rf *RowFormatter) String() string {
|
||||
b := append([]byte{}, '{')
|
||||
|
||||
fields := *rf
|
||||
if len(fields) > 0 {
|
||||
b = append(b, fields[0].String()...)
|
||||
fields = fields[1:]
|
||||
for _, field := range fields {
|
||||
b = append(b, ',')
|
||||
b = append(b, field.String()...)
|
||||
}
|
||||
}
|
||||
|
||||
b = append(b, '}')
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// Reset resets lr
|
||||
func (lr *LogRows) Reset() {
|
||||
lr.buf = lr.buf[:0]
|
||||
|
||||
fb := lr.fieldsBuf
|
||||
for i := range fb {
|
||||
fb[i].Reset()
|
||||
}
|
||||
lr.fieldsBuf = fb[:0]
|
||||
|
||||
sids := lr.streamIDs
|
||||
for i := range sids {
|
||||
sids[i].reset()
|
||||
}
|
||||
lr.streamIDs = sids[:0]
|
||||
|
||||
sns := lr.streamTagsCanonicals
|
||||
for i := range sns {
|
||||
sns[i] = nil
|
||||
}
|
||||
lr.streamTagsCanonicals = sns[:0]
|
||||
|
||||
lr.timestamps = lr.timestamps[:0]
|
||||
|
||||
rows := lr.rows
|
||||
for i := range rows {
|
||||
rows[i] = nil
|
||||
}
|
||||
lr.rows = rows[:0]
|
||||
|
||||
lr.sf = nil
|
||||
|
||||
sfs := lr.streamFields
|
||||
for k := range sfs {
|
||||
delete(sfs, k)
|
||||
}
|
||||
|
||||
ifs := lr.ignoreFields
|
||||
for k := range ifs {
|
||||
delete(ifs, k)
|
||||
}
|
||||
}
|
||||
|
||||
// NeedFlush returns true if lr contains too much data, so it must be flushed to the storage.
|
||||
func (lr *LogRows) NeedFlush() bool {
|
||||
return len(lr.buf) > (maxUncompressedBlockSize/8)*7
|
||||
}
|
||||
|
||||
// MustAdd adds a log entry with the given args to lr.
|
||||
//
|
||||
// It is OK to modify the args after returning from the function,
|
||||
// since lr copies all the args to internal data.
|
||||
func (lr *LogRows) MustAdd(tenantID TenantID, timestamp int64, fields []Field) {
|
||||
// Compose StreamTags from fields according to lr.streamFields
|
||||
sfs := lr.streamFields
|
||||
st := GetStreamTags()
|
||||
for i := range fields {
|
||||
f := &fields[i]
|
||||
if _, ok := sfs[f.Name]; ok {
|
||||
st.Add(f.Name, f.Value)
|
||||
}
|
||||
}
|
||||
|
||||
// Marshal StreamTags
|
||||
bb := bbPool.Get()
|
||||
bb.B = st.MarshalCanonical(bb.B)
|
||||
PutStreamTags(st)
|
||||
|
||||
// Calculate the id for the StreamTags
|
||||
var sid streamID
|
||||
sid.tenantID = tenantID
|
||||
sid.id = hash128(bb.B)
|
||||
|
||||
// Store the row
|
||||
lr.mustAddInternal(sid, timestamp, fields, bb.B)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func (lr *LogRows) mustAddInternal(sid streamID, timestamp int64, fields []Field, streamTagsCanonical []byte) {
|
||||
buf := lr.buf
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, streamTagsCanonical...)
|
||||
|
||||
lr.streamTagsCanonicals = append(lr.streamTagsCanonicals, buf[bufLen:])
|
||||
lr.streamIDs = append(lr.streamIDs, sid)
|
||||
lr.timestamps = append(lr.timestamps, timestamp)
|
||||
|
||||
// Store all the fields
|
||||
ifs := lr.ignoreFields
|
||||
fb := lr.fieldsBuf
|
||||
fieldsLen := len(fb)
|
||||
for i := range fields {
|
||||
f := &fields[i]
|
||||
|
||||
if _, ok := ifs[f.Name]; ok {
|
||||
// Skip fields from the ifs map
|
||||
continue
|
||||
}
|
||||
if f.Value == "" {
|
||||
// Skip fields without values
|
||||
continue
|
||||
}
|
||||
|
||||
fb = append(fb, Field{})
|
||||
dstField := &fb[len(fb)-1]
|
||||
|
||||
bufLen = len(buf)
|
||||
if f.Name != "_msg" {
|
||||
buf = append(buf, f.Name...)
|
||||
}
|
||||
dstField.Name = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
bufLen = len(buf)
|
||||
buf = append(buf, f.Value...)
|
||||
dstField.Value = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
}
|
||||
lr.sf = fb[fieldsLen:]
|
||||
sort.Sort(&lr.sf)
|
||||
lr.rows = append(lr.rows, lr.sf)
|
||||
|
||||
lr.fieldsBuf = fb
|
||||
lr.buf = buf
|
||||
}
|
||||
|
||||
// GetLogRows returns LogRows from the pool for the given streamFields.
|
||||
//
|
||||
// streamFields is a set of field names, which must be associated with the stream.
|
||||
//
|
||||
// Return back it to the pool with PutLogRows() when it is no longer needed.
|
||||
func GetLogRows(streamFields, ignoreFields []string) *LogRows {
|
||||
v := logRowsPool.Get()
|
||||
if v == nil {
|
||||
v = &LogRows{}
|
||||
}
|
||||
lr := v.(*LogRows)
|
||||
|
||||
// Initialize streamFields
|
||||
sfs := lr.streamFields
|
||||
if sfs == nil {
|
||||
sfs = make(map[string]struct{}, len(streamFields))
|
||||
lr.streamFields = sfs
|
||||
}
|
||||
for _, f := range streamFields {
|
||||
sfs[f] = struct{}{}
|
||||
}
|
||||
|
||||
// Initialize ignoreFields
|
||||
ifs := lr.ignoreFields
|
||||
if ifs == nil {
|
||||
ifs = make(map[string]struct{}, len(ignoreFields))
|
||||
lr.ignoreFields = ifs
|
||||
}
|
||||
for _, f := range ignoreFields {
|
||||
if f != "" {
|
||||
ifs[f] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return lr
|
||||
}
|
||||
|
||||
// PutLogRows returns lr to the pool.
|
||||
func PutLogRows(lr *LogRows) {
|
||||
lr.Reset()
|
||||
logRowsPool.Put(lr)
|
||||
}
|
||||
|
||||
var logRowsPool sync.Pool
|
||||
|
||||
// Len returns the number of items in lr.
|
||||
func (lr *LogRows) Len() int {
|
||||
return len(lr.streamIDs)
|
||||
}
|
||||
|
||||
// Less returns true if (streamID, timestamp) for row i is smaller than the (streamID, timestamp) for row j
|
||||
func (lr *LogRows) Less(i, j int) bool {
|
||||
a := &lr.streamIDs[i]
|
||||
b := &lr.streamIDs[j]
|
||||
if !a.equal(b) {
|
||||
return a.less(b)
|
||||
}
|
||||
return lr.timestamps[i] < lr.timestamps[j]
|
||||
}
|
||||
|
||||
// Swap swaps rows i and j in lr.
|
||||
func (lr *LogRows) Swap(i, j int) {
|
||||
a := &lr.streamIDs[i]
|
||||
b := &lr.streamIDs[j]
|
||||
*a, *b = *b, *a
|
||||
|
||||
tsA, tsB := &lr.timestamps[i], &lr.timestamps[j]
|
||||
*tsA, *tsB = *tsB, *tsA
|
||||
|
||||
snA, snB := &lr.streamTagsCanonicals[i], &lr.streamTagsCanonicals[j]
|
||||
*snA, *snB = *snB, *snA
|
||||
|
||||
fieldsA, fieldsB := &lr.rows[i], &lr.rows[j]
|
||||
*fieldsA, *fieldsB = *fieldsB, *fieldsA
|
||||
}
|
83
lib/logstorage/log_rows_timing_test.go
Normal file
83
lib/logstorage/log_rows_timing_test.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkLogRowsMustAdd(b *testing.B) {
|
||||
rows := newBenchRows(map[string]string{
|
||||
"input.type": "filestream",
|
||||
"ecs.version": "8.0.0",
|
||||
"host.hostname": "foobar-baz-abc",
|
||||
"host.architecture": "x86_64",
|
||||
"host.name": "foobar-baz-abc",
|
||||
"host.os.codename": "bionic",
|
||||
"host.os.type": "linux",
|
||||
"host.os.platform": "ubuntu",
|
||||
"host.os.version": "18.04.6 LTS (Bionic Beaver)",
|
||||
"host.os.family": "debian",
|
||||
"host.os.name": "Ubuntu",
|
||||
"host.os.kernel": "4.15.0-211-generic",
|
||||
"host.id": "a634d50249af449dbcb3ce724822568a",
|
||||
"host.containerized": "false",
|
||||
"host.ip": `["10.0.0.42","10.224.112.1","172.20.0.1","172.18.0.1","172.19.0.1","fc00:f853:ccd:e793::1","fe80::1","172.21.0.1","172.17.0.1"]`,
|
||||
"host.mac": `["02-42-42-90-52-D9","02-42-C6-48-A6-84","02-42-FD-91-7E-17","52-54-00-F5-13-E7","54-E1-AD-89-1A-4C","F8-34-41-3C-C0-85"]`,
|
||||
"agent.ephemeral_id": "6c251f67-7210-4cef-8f72-a9546cbb48cc",
|
||||
"agent.id": "e97243c5-5ef3-4dc1-8828-504f68731e87",
|
||||
"agent.name": "foobar-baz-abc",
|
||||
"agent.type": "filebeat",
|
||||
"agent.version": "8.8.0",
|
||||
"log.file.path": "/var/log/auth.log",
|
||||
"log.offset": "37908",
|
||||
}, []string{
|
||||
"Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=0)",
|
||||
"Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=1)",
|
||||
"Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=2)",
|
||||
"Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=3)",
|
||||
"Jun 4 20:34:07 foobar-baz-abc sudo: pam_unix(sudo:session): session opened for user root by (uid=4)",
|
||||
})
|
||||
streamFields := []string{
|
||||
"host.hostname",
|
||||
"agent.name",
|
||||
"log.file.path",
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(rows)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
benchmarkLogRowsMustAdd(rows, streamFields)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkLogRowsMustAdd(rows [][]Field, streamFields []string) {
|
||||
lr := GetLogRows(streamFields, nil)
|
||||
var tid TenantID
|
||||
for i, fields := range rows {
|
||||
tid.AccountID = uint32(i)
|
||||
tid.ProjectID = uint32(2 * i)
|
||||
timestamp := int64(i) * 1000
|
||||
lr.MustAdd(tid, timestamp, fields)
|
||||
}
|
||||
PutLogRows(lr)
|
||||
}
|
||||
|
||||
func newBenchRows(constFields map[string]string, messages []string) [][]Field {
|
||||
rows := make([][]Field, 0, len(messages))
|
||||
for _, msg := range messages {
|
||||
row := make([]Field, 0, len(constFields)+1)
|
||||
for k, v := range constFields {
|
||||
row = append(row, Field{
|
||||
Name: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
row = append(row, Field{
|
||||
Name: "_msg",
|
||||
Value: msg,
|
||||
})
|
||||
rows = append(rows, row)
|
||||
}
|
||||
return rows
|
||||
}
|
1100
lib/logstorage/parser.go
Normal file
1100
lib/logstorage/parser.go
Normal file
File diff suppressed because it is too large
Load diff
966
lib/logstorage/parser_test.go
Normal file
966
lib/logstorage/parser_test.go
Normal file
|
@ -0,0 +1,966 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLexer(t *testing.T) {
|
||||
f := func(s string, tokensExpected []string) {
|
||||
t.Helper()
|
||||
lex := newLexer(s)
|
||||
for _, tokenExpected := range tokensExpected {
|
||||
lex.nextToken()
|
||||
if lex.token != tokenExpected {
|
||||
t.Fatalf("unexpected token; got %q; want %q", lex.token, tokenExpected)
|
||||
}
|
||||
}
|
||||
lex.nextToken()
|
||||
if lex.token != "" {
|
||||
t.Fatalf("unexpected tail token: %q", lex.token)
|
||||
}
|
||||
}
|
||||
|
||||
f("", nil)
|
||||
f(" ", nil)
|
||||
f("foo", []string{"foo"})
|
||||
f("тест123", []string{"тест123"})
|
||||
f("foo:bar", []string{"foo", ":", "bar"})
|
||||
f(` re ( "тест(\":" ) `, []string{"re", "(", `тест(":`, ")"})
|
||||
f(" `foo, bar`* AND baz:(abc or 'd\\'\"ЙЦУК `'*)", []string{"foo, bar", "*", "AND", "baz", ":", "(", "abc", "or", `d'"ЙЦУК ` + "`", "*", ")"})
|
||||
f(`_stream:{foo="bar",a=~"baz", b != 'cd',"d,}a"!~abc}`,
|
||||
[]string{"_stream", ":", "{", "foo", "=", "bar", ",", "a", "=~", "baz", ",", "b", "!=", "cd", ",", "d,}a", "!~", "abc", "}"})
|
||||
}
|
||||
|
||||
func TestNewStreamFilterSuccess(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
sf, err := newStreamFilter(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
result := sf.String()
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected StreamFilter; got %s; want %s", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("{}", "{}")
|
||||
f(`{foo="bar"}`, `{foo="bar"}`)
|
||||
f(`{ "foo" =~ "bar.+" , baz!="a" or x="y"}`, `{foo=~"bar.+",baz!="a" or x="y"}`)
|
||||
f(`{"a b"='c}"d' OR de="aaa"}`, `{"a b"="c}\"d" or de="aaa"}`)
|
||||
f(`{a="b", c="d" or x="y"}`, `{a="b",c="d" or x="y"}`)
|
||||
}
|
||||
|
||||
func TestNewStreamFilterFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
sf, err := newStreamFilter(s)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if sf != nil {
|
||||
t.Fatalf("expecting nil sf; got %v", sf)
|
||||
}
|
||||
}
|
||||
|
||||
f("")
|
||||
f("}")
|
||||
f("{")
|
||||
f("{foo")
|
||||
f("{foo}")
|
||||
f("{'foo")
|
||||
f("{foo=")
|
||||
f("{foo or bar}")
|
||||
f("{foo=bar")
|
||||
f("{foo=bar baz}")
|
||||
f("{foo='bar' baz='x'}")
|
||||
}
|
||||
|
||||
func TestParseTimeRange(t *testing.T) {
|
||||
f := func(s string, minTimestampExpected, maxTimestampExpected int64) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery("_time:" + s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tf, ok := q.f.(*timeFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter; got %T; want *timeFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if tf.stringRepr != s {
|
||||
t.Fatalf("unexpected string represenation for timeFilter; got %q; want %q", tf.stringRepr, s)
|
||||
}
|
||||
if tf.minTimestamp != minTimestampExpected {
|
||||
t.Fatalf("unexpected minTimestamp; got %s; want %s", timestampToString(tf.minTimestamp), timestampToString(minTimestampExpected))
|
||||
}
|
||||
if tf.maxTimestamp != maxTimestampExpected {
|
||||
t.Fatalf("unexpected maxTimestamp; got %s; want %s", timestampToString(tf.maxTimestamp), timestampToString(maxTimestampExpected))
|
||||
}
|
||||
}
|
||||
|
||||
var minTimestamp, maxTimestamp int64
|
||||
|
||||
// _time:YYYY -> _time:[YYYY, YYYY+1)
|
||||
minTimestamp = time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023", minTimestamp, maxTimestamp)
|
||||
f("2023Z", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-hh:mm -> _time:[YYYY-hh:mm, (YYYY+1)-hh:mm)
|
||||
minTimestamp = time.Date(2023, time.January, 1, 2, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2024, time.January, 1, 2, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY+hh:mm -> _time:[YYYY+hh:mm, (YYYY+1)+hh:mm)
|
||||
minTimestamp = time.Date(2022, time.December, 31, 22, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.December, 31, 22, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023+02:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM -> _time:[YYYY-MM, YYYY-MM+1)
|
||||
minTimestamp = time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02", minTimestamp, maxTimestamp)
|
||||
f("2023-02Z", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-hh:mm -> _time:[YYYY-MM-hh:mm, (YYYY-MM+1)-hh:mm)
|
||||
minTimestamp = time.Date(2023, time.February, 1, 2, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 2, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-02:00", minTimestamp, maxTimestamp)
|
||||
// March
|
||||
minTimestamp = time.Date(2023, time.March, 1, 2, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.April, 1, 2, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03-02:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM+hh:mm -> _time:[YYYY-MM+hh:mm, (YYYY-MM+1)+hh:mm)
|
||||
minTimestamp = time.Date(2023, time.February, 28, 21, 35, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 31, 21, 35, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03+02:25", minTimestamp, maxTimestamp)
|
||||
// February with timezone offset
|
||||
minTimestamp = time.Date(2023, time.January, 31, 21, 35, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.February, 28, 21, 35, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02+02:25", minTimestamp, maxTimestamp)
|
||||
// February with timezone offset at leap year
|
||||
minTimestamp = time.Date(2024, time.January, 31, 21, 35, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2024, time.February, 29, 21, 35, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2024-02+02:25", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DD
|
||||
minTimestamp = time.Date(2023, time.February, 12, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.February, 13, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-12", minTimestamp, maxTimestamp)
|
||||
f("2023-02-12Z", minTimestamp, maxTimestamp)
|
||||
// February 28
|
||||
minTimestamp = time.Date(2023, time.February, 28, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28", minTimestamp, maxTimestamp)
|
||||
// January 31
|
||||
minTimestamp = time.Date(2023, time.January, 31, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-01-31", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DD-hh:mm
|
||||
minTimestamp = time.Date(2023, time.January, 31, 2, 25, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.February, 1, 2, 25, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-01-31-02:25", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DD+hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 21, 35, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 21, 35, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03-01+02:25", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28T23", minTimestamp, maxTimestamp)
|
||||
f("2023-02-28T23Z", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH-hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 01, 25, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.February, 28, 02, 25, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-27T23-02:25", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH+hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 35, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 00, 35, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03-01T02+02:25", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28T23:59", minTimestamp, maxTimestamp)
|
||||
f("2023-02-28T23:59Z", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM-hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28T22:59-01:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM+hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03-01T00:59+01:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM:SS-hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 59, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28T23:59:59", minTimestamp, maxTimestamp)
|
||||
f("2023-02-28T23:59:59Z", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM:SS-hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 59, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-02-28T22:59:59-01:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:YYYY-MM-DDTHH:MM:SS+hh:mm
|
||||
minTimestamp = time.Date(2023, time.February, 28, 23, 59, 59, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f("2023-03-01T00:59:59+01:00", minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:(start, end)
|
||||
minTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() + 1
|
||||
maxTimestamp = time.Date(2023, time.April, 6, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f(`(2023-03-01,2023-04-06)`, minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:[start, end)
|
||||
minTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.April, 6, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f(`[2023-03-01,2023-04-06)`, minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:(start, end]
|
||||
minTimestamp = time.Date(2023, time.March, 1, 21, 20, 0, 0, time.UTC).UnixNano() + 1
|
||||
maxTimestamp = time.Date(2023, time.April, 7, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f(`(2023-03-01T21:20,2023-04-06]`, minTimestamp, maxTimestamp)
|
||||
|
||||
// _time:[start, end]
|
||||
minTimestamp = time.Date(2023, time.February, 28, 21, 40, 0, 0, time.UTC).UnixNano()
|
||||
maxTimestamp = time.Date(2023, time.April, 7, 0, 0, 0, 0, time.UTC).UnixNano() - 1
|
||||
f(`[2023-03-01+02:20,2023-04-06T23]`, minTimestamp, maxTimestamp)
|
||||
}
|
||||
|
||||
func TestParseSequenceFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected string, phrasesExpected []string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
sf, ok := q.f.(*sequenceFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *sequenceFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if sf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", sf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if !reflect.DeepEqual(sf.phrases, phrasesExpected) {
|
||||
t.Fatalf("unexpected phrases\ngot\n%q\nwant\n%q", sf.phrases, phrasesExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`seq()`, ``, nil)
|
||||
f(`foo:seq(foo)`, `foo`, []string{"foo"})
|
||||
f(`_msg:seq("foo bar,baz")`, `_msg`, []string{"foo bar,baz"})
|
||||
f(`seq(foo,bar-baz.aa"bb","c,)d")`, ``, []string{"foo", `bar-baz.aa"bb"`, "c,)d"})
|
||||
}
|
||||
|
||||
func TestParseInFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected string, valuesExpected []string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
f, ok := q.f.(*inFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *inFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if f.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", f.fieldName, fieldNameExpected)
|
||||
}
|
||||
if !reflect.DeepEqual(f.values, valuesExpected) {
|
||||
t.Fatalf("unexpected values\ngot\n%q\nwant\n%q", f.values, valuesExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`in()`, ``, nil)
|
||||
f(`foo:in(foo)`, `foo`, []string{"foo"})
|
||||
f(`:in("foo bar,baz")`, ``, []string{"foo bar,baz"})
|
||||
f(`ip:in(1.2.3.4, 5.6.7.8, 9.10.11.12)`, `ip`, []string{"1.2.3.4", "5.6.7.8", "9.10.11.12"})
|
||||
f(`foo-bar:in(foo,bar-baz.aa"bb","c,)d")`, `foo-bar`, []string{"foo", `bar-baz.aa"bb"`, "c,)d"})
|
||||
}
|
||||
|
||||
func TestParseIPv4RangeFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected string, minValueExpected, maxValueExpected uint32) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
rf, ok := q.f.(*ipv4RangeFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *ipv4RangeFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if rf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", rf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if rf.minValue != minValueExpected {
|
||||
t.Fatalf("unexpected minValue; got %08x; want %08x", rf.minValue, minValueExpected)
|
||||
}
|
||||
if rf.maxValue != maxValueExpected {
|
||||
t.Fatalf("unexpected maxValue; got %08x; want %08x", rf.maxValue, maxValueExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7.8)`, ``, 0x01020304, 0x05060708)
|
||||
f(`_msg:ipv4_range("0.0.0.0", 255.255.255.255)`, `_msg`, 0, 0xffffffff)
|
||||
f(`ip:ipv4_range(1.2.3.0/24)`, `ip`, 0x01020300, 0x010203ff)
|
||||
f(`:ipv4_range("1.2.3.34/24")`, ``, 0x01020300, 0x010203ff)
|
||||
f(`ipv4_range("1.2.3.34/20")`, ``, 0x01020000, 0x01020fff)
|
||||
f(`ipv4_range("1.2.3.15/32")`, ``, 0x0102030f, 0x0102030f)
|
||||
f(`ipv4_range(1.2.3.34/0)`, ``, 0, 0xffffffff)
|
||||
}
|
||||
|
||||
func TestParseStringRangeFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected, minValueExpected, maxValueExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
rf, ok := q.f.(*stringRangeFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *stringRangeFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if rf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", rf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if rf.minValue != minValueExpected {
|
||||
t.Fatalf("unexpected minValue; got %q; want %q", rf.minValue, minValueExpected)
|
||||
}
|
||||
if rf.maxValue != maxValueExpected {
|
||||
t.Fatalf("unexpected maxValue; got %q; want %q", rf.maxValue, maxValueExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("string_range(foo, bar)", ``, "foo", "bar")
|
||||
f(`abc:string_range("foo,bar", "baz) !")`, `abc`, `foo,bar`, `baz) !`)
|
||||
}
|
||||
|
||||
func TestParseRegexpFilter(t *testing.T) {
|
||||
f := func(s, reExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery("re(" + s + ")")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
rf, ok := q.f.(*regexpFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *regexpFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if reString := rf.re.String(); reString != reExpected {
|
||||
t.Fatalf("unexpected regexp; got %q; want %q", reString, reExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`""`, ``)
|
||||
f(`foo`, `foo`)
|
||||
f(`"foo.+|bar.*"`, `foo.+|bar.*`)
|
||||
f(`"foo(bar|baz),x[y]"`, `foo(bar|baz),x[y]`)
|
||||
}
|
||||
|
||||
func TestParseAnyCasePhraseFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected, phraseExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
pf, ok := q.f.(*anyCasePhraseFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *anyCasePhraseFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if pf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", pf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if pf.phrase != phraseExpected {
|
||||
t.Fatalf("unexpected phrase; got %q; want %q", pf.phrase, phraseExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`i("")`, ``, ``)
|
||||
f(`i(foo)`, ``, `foo`)
|
||||
f(`abc-de.fg:i(foo-bar+baz)`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":i("foo-bar+baz")`, `abc-de.fg`, `foo-bar+baz`)
|
||||
}
|
||||
|
||||
func TestParseAnyCasePrefixFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected, prefixExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
pf, ok := q.f.(*anyCasePrefixFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *anyCasePrefixFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if pf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", pf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if pf.prefix != prefixExpected {
|
||||
t.Fatalf("unexpected prefix; got %q; want %q", pf.prefix, prefixExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`i(*)`, ``, ``)
|
||||
f(`i(""*)`, ``, ``)
|
||||
f(`i(foo*)`, ``, `foo`)
|
||||
f(`abc-de.fg:i(foo-bar+baz*)`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":i("foo-bar+baz"*)`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":i("foo-bar*baz *"*)`, `abc-de.fg`, `foo-bar*baz *`)
|
||||
}
|
||||
|
||||
func TestParsePhraseFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected, phraseExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
pf, ok := q.f.(*phraseFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *phraseFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if pf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", pf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if pf.phrase != phraseExpected {
|
||||
t.Fatalf("unexpected prefix; got %q; want %q", pf.phrase, phraseExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`""`, ``, ``)
|
||||
f(`foo`, ``, `foo`)
|
||||
f(`abc-de.fg:foo-bar+baz`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":"foo-bar+baz"`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":"foo-bar*baz *"`, `abc-de.fg`, `foo-bar*baz *`)
|
||||
f(`"foo:bar*,( baz"`, ``, `foo:bar*,( baz`)
|
||||
}
|
||||
|
||||
func TestParsePrefixFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected, prefixExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
pf, ok := q.f.(*prefixFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *prefixFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if pf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", pf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if pf.prefix != prefixExpected {
|
||||
t.Fatalf("unexpected prefix; got %q; want %q", pf.prefix, prefixExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`*`, ``, ``)
|
||||
f(`""*`, ``, ``)
|
||||
f(`foo*`, ``, `foo`)
|
||||
f(`abc-de.fg:foo-bar+baz*`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":"foo-bar+baz"*`, `abc-de.fg`, `foo-bar+baz`)
|
||||
f(`"abc-de.fg":"foo-bar*baz *"*`, `abc-de.fg`, `foo-bar*baz *`)
|
||||
}
|
||||
|
||||
func TestParseRangeFilter(t *testing.T) {
|
||||
f := func(s, fieldNameExpected string, minValueExpected, maxValueExpected float64) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
rf, ok := q.f.(*rangeFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter type; got %T; want *ipv4RangeFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if rf.fieldName != fieldNameExpected {
|
||||
t.Fatalf("unexpected fieldName; got %q; want %q", rf.fieldName, fieldNameExpected)
|
||||
}
|
||||
if rf.minValue != minValueExpected {
|
||||
t.Fatalf("unexpected minValue; got %v; want %v", rf.minValue, minValueExpected)
|
||||
}
|
||||
if rf.maxValue != maxValueExpected {
|
||||
t.Fatalf("unexpected maxValue; got %v; want %v", rf.maxValue, maxValueExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(`range[-1.234, +2e5]`, ``, -1.234, 2e5)
|
||||
f(`foo:range[-1.234e-5, 2e5]`, `foo`, -1.234e-5, 2e5)
|
||||
f(`range:range["-1.234e5", "-2e-5"]`, `range`, -1.234e5, -2e-5)
|
||||
|
||||
f(`_msg:range[1, 2]`, `_msg`, 1, 2)
|
||||
f(`:range(1, 2)`, ``, math.Nextafter(1, math.Inf(1)), math.Nextafter(2, math.Inf(-1)))
|
||||
f(`range[1, 2)`, ``, 1, math.Nextafter(2, math.Inf(-1)))
|
||||
f(`range("1", 2]`, ``, math.Nextafter(1, math.Inf(1)), 2)
|
||||
}
|
||||
|
||||
func TestParseQuerySuccess(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
result := q.String()
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("foo", "foo")
|
||||
f(":foo", "foo")
|
||||
f(`"":foo`, "foo")
|
||||
f(`"" bar`, `"" bar`)
|
||||
f(`!''`, `!""`)
|
||||
f(`foo:""`, `foo:""`)
|
||||
f(`!foo:""`, `!foo:""`)
|
||||
f(`not foo:""`, `!foo:""`)
|
||||
f(`not(foo)`, `!foo`)
|
||||
f(`not (foo)`, `!foo`)
|
||||
f(`not ( foo or bar )`, `!(foo or bar)`)
|
||||
f(`foo:!""`, `!foo:""`)
|
||||
f("_msg:foo", "foo")
|
||||
f("'foo:bar'", `"foo:bar"`)
|
||||
f("'!foo'", `"!foo"`)
|
||||
f("foo 'and' and bar", `foo "and" bar`)
|
||||
f("foo bar", "foo bar")
|
||||
f("foo and bar", "foo bar")
|
||||
f("foo AND bar", "foo bar")
|
||||
f("foo or bar", "foo or bar")
|
||||
f("foo OR bar", "foo or bar")
|
||||
f("not foo", "!foo")
|
||||
f("! foo", "!foo")
|
||||
f("not !`foo bar`", `"foo bar"`)
|
||||
f("foo or bar and not baz", "foo or bar !baz")
|
||||
f("'foo bar' !baz", `"foo bar" !baz`)
|
||||
f("foo:!bar", `!foo:bar`)
|
||||
f(`foo and bar and baz or x or y or z and zz`, `foo bar baz or x or y or z zz`)
|
||||
f(`foo and bar and (baz or x or y or z) and zz`, `foo bar (baz or x or y or z) zz`)
|
||||
f(`(foo or bar or baz) and x and y and (z or zz)`, `(foo or bar or baz) x y (z or zz)`)
|
||||
f(`(foo or bar or baz) and x and y and not (z or zz)`, `(foo or bar or baz) x y !(z or zz)`)
|
||||
f(`NOT foo AND bar OR baz`, `!foo bar or baz`)
|
||||
f(`NOT (foo AND bar) OR baz`, `!(foo bar) or baz`)
|
||||
f(`foo OR bar AND baz`, `foo or bar baz`)
|
||||
f(`(foo OR bar) AND baz`, `(foo or bar) baz`)
|
||||
|
||||
// parens
|
||||
f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`)
|
||||
f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`)
|
||||
f("level:error and _msg:(a or b)", "level:error (a or b)")
|
||||
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:re("foo") !bar`)
|
||||
f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`)
|
||||
|
||||
// prefix search
|
||||
f(`'foo'* and (a:x* and x:* or y:i(""*)) and i("abc def"*)`, `foo* (a:x* x:* or y:i(*)) i("abc def"*)`)
|
||||
|
||||
// This isn't a prefix search - it equals to `foo AND *`
|
||||
f(`foo *`, `foo *`)
|
||||
f(`"foo" *`, `foo *`)
|
||||
|
||||
// empty filter
|
||||
f(`"" or foo:"" and not bar:""`, `"" or foo:"" !bar:""`)
|
||||
|
||||
// _stream filters
|
||||
f(`_stream:{}`, ``)
|
||||
f(`_stream:{foo="bar", baz=~"x" OR or!="b", "x=},"="d}{"}`, `_stream:{foo="bar",baz=~"x" or "or"!="b","x=},"="d}{"}`)
|
||||
f(`_stream:{or=a or ","="b"}`, `_stream:{"or"="a" or ","="b"}`)
|
||||
f("_stream : { foo = bar , } ", `_stream:{foo="bar"}`)
|
||||
|
||||
// _time filters
|
||||
f(`_time:[-5m,now)`, `_time:[-5m,now)`)
|
||||
f(`_time:( now-1h , now-5m34s5ms]`, `_time:(now-1h,now-5m34s5ms]`)
|
||||
f(`_time:[2023, 2023-01)`, `_time:[2023,2023-01)`)
|
||||
f(`_time:[2023-01-02, 2023-02-03T04)`, `_time:[2023-01-02,2023-02-03T04)`)
|
||||
f(`_time:[2023-01-02T04:05, 2023-02-03T04:05:06)`, `_time:[2023-01-02T04:05,2023-02-03T04:05:06)`)
|
||||
f(`_time:[2023-01-02T04:05:06Z, 2023-02-03T04:05:06.234Z)`, `_time:[2023-01-02T04:05:06Z,2023-02-03T04:05:06.234Z)`)
|
||||
f(`_time:[2023-01-02T04:05:06+02:30, 2023-02-03T04:05:06.234-02:45)`, `_time:[2023-01-02T04:05:06+02:30,2023-02-03T04:05:06.234-02:45)`)
|
||||
f(`_time:[2023-06-07T23:56:34.3456-02:30, now)`, `_time:[2023-06-07T23:56:34.3456-02:30,now)`)
|
||||
f(`_time:("2024-01-02+02:00", now)`, `_time:(2024-01-02+02:00,now)`)
|
||||
f(`_time:now`, `_time:now`)
|
||||
f(`_time:"now"`, `_time:now`)
|
||||
f(`_time:2024Z`, `_time:2024Z`)
|
||||
f(`_time:2024-02:30`, `_time:2024-02:30`)
|
||||
f(`_time:2024-01-02:30`, `_time:2024-01-02:30`)
|
||||
f(`_time:2024-01-02:30`, `_time:2024-01-02:30`)
|
||||
f(`_time:2024-01-02+03:30`, `_time:2024-01-02+03:30`)
|
||||
f(`_time:2024-01-02T10+03:30`, `_time:2024-01-02T10+03:30`)
|
||||
f(`_time:2024-01-02T10:20+03:30`, `_time:2024-01-02T10:20+03:30`)
|
||||
f(`_time:2024-01-02T10:20:40+03:30`, `_time:2024-01-02T10:20:40+03:30`)
|
||||
f(`_time:2024-01-02T10:20:40-03:30`, `_time:2024-01-02T10:20:40-03:30`)
|
||||
f(`_time:"2024-01-02T10:20:40Z"`, `_time:2024-01-02T10:20:40Z`)
|
||||
f(`_time:2023-01-02T04:05:06.789Z`, `_time:2023-01-02T04:05:06.789Z`)
|
||||
f(`_time:2023-01-02T04:05:06.789-02:30`, `_time:2023-01-02T04:05:06.789-02:30`)
|
||||
f(`_time:2023-01-02T04:05:06.789+02:30`, `_time:2023-01-02T04:05:06.789+02:30`)
|
||||
f(`_time:[1234567890, 1400000000]`, `_time:[1234567890,1400000000]`)
|
||||
|
||||
// reserved keywords
|
||||
f("and", `"and"`)
|
||||
f("and and or", `"and" "or"`)
|
||||
f("AnD", `"AnD"`)
|
||||
f("or", `"or"`)
|
||||
f("re 'and' `or` 'not'", `"re" "and" "or" "not"`)
|
||||
f("foo:and", `foo:"and"`)
|
||||
f("'re':or or x", `"re":"or" or x`)
|
||||
f(`"-"`, `"-"`)
|
||||
f(`"!"`, `"!"`)
|
||||
f(`"not"`, `"not"`)
|
||||
f(`''`, `""`)
|
||||
|
||||
// reserved functions
|
||||
f("exact", `"exact"`)
|
||||
f("exact:a", `"exact":a`)
|
||||
f("exact-foo", `exact-foo`)
|
||||
f("a:exact", `a:"exact"`)
|
||||
f("a:exact-foo", `a:exact-foo`)
|
||||
f("exact-foo:b", `exact-foo:b`)
|
||||
f("exact_prefix", `"exact_prefix"`)
|
||||
f("exact_prefix:a", `"exact_prefix":a`)
|
||||
f("exact_prefix-foo", `exact_prefix-foo`)
|
||||
f("a:exact_prefix", `a:"exact_prefix"`)
|
||||
f("a:exact_prefix-foo", `a:exact_prefix-foo`)
|
||||
f("exact_prefix-foo:b", `exact_prefix-foo:b`)
|
||||
f("i", `"i"`)
|
||||
f("i-foo", `i-foo`)
|
||||
f("a:i-foo", `a:i-foo`)
|
||||
f("i-foo:b", `i-foo:b`)
|
||||
f("in", `"in"`)
|
||||
f("in:a", `"in":a`)
|
||||
f("in-foo", `in-foo`)
|
||||
f("a:in", `a:"in"`)
|
||||
f("a:in-foo", `a:in-foo`)
|
||||
f("in-foo:b", `in-foo:b`)
|
||||
f("ipv4_range", `"ipv4_range"`)
|
||||
f("ipv4_range:a", `"ipv4_range":a`)
|
||||
f("ipv4_range-foo", `ipv4_range-foo`)
|
||||
f("a:ipv4_range", `a:"ipv4_range"`)
|
||||
f("a:ipv4_range-foo", `a:ipv4_range-foo`)
|
||||
f("ipv4_range-foo:b", `ipv4_range-foo:b`)
|
||||
f("len_range", `"len_range"`)
|
||||
f("len_range:a", `"len_range":a`)
|
||||
f("len_range-foo", `len_range-foo`)
|
||||
f("a:len_range", `a:"len_range"`)
|
||||
f("a:len_range-foo", `a:len_range-foo`)
|
||||
f("len_range-foo:b", `len_range-foo:b`)
|
||||
f("range", `"range"`)
|
||||
f("range:a", `"range":a`)
|
||||
f("range-foo", `range-foo`)
|
||||
f("a:range", `a:"range"`)
|
||||
f("a:range-foo", `a:range-foo`)
|
||||
f("range-foo:b", `range-foo:b`)
|
||||
f("re", `"re"`)
|
||||
f("re-bar", `re-bar`)
|
||||
f("a:re-bar", `a:re-bar`)
|
||||
f("re-bar:a", `re-bar:a`)
|
||||
f("seq", `"seq"`)
|
||||
f("seq-a", `seq-a`)
|
||||
f("x:seq-a", `x:seq-a`)
|
||||
f("seq-a:x", `seq-a:x`)
|
||||
f("string_range", `"string_range"`)
|
||||
f("string_range-a", `string_range-a`)
|
||||
f("x:string_range-a", `x:string_range-a`)
|
||||
f("string_range-a:x", `string_range-a:x`)
|
||||
|
||||
// exact filter
|
||||
f("exact(foo)", `exact(foo)`)
|
||||
f("exact('foo bar),|baz')", `exact("foo bar),|baz")`)
|
||||
f(`exact(foo-bar,)`, `exact(foo-bar)`)
|
||||
f(`exact(foo|b:ar)`, `exact("foo|b:ar")`)
|
||||
f(`foo:exact(f,)`, `foo:exact(f)`)
|
||||
|
||||
// exact_prefix filter
|
||||
f("exact_prefix(foo)", `exact_prefix(foo)`)
|
||||
f(`exact_prefix("foo bar")`, `exact_prefix("foo bar")`)
|
||||
f(`exact_prefix(foo-bar,)`, `exact_prefix(foo-bar)`)
|
||||
f(`exact_prefix(foo|b:ar)`, `exact_prefix("foo|b:ar")`)
|
||||
f(`foo:exact_prefix(f,)`, `foo:exact_prefix(f)`)
|
||||
|
||||
// i filter
|
||||
f("i(foo)", `i(foo)`)
|
||||
f("i(foo*)", `i(foo*)`)
|
||||
f("i(`foo`* )", `i(foo*)`)
|
||||
f("i(' foo ) bar')", `i(" foo ) bar")`)
|
||||
f("i('foo bar'*)", `i("foo bar"*)`)
|
||||
f(`foo:i(foo:bar-baz|aa+bb)`, `foo:i("foo:bar-baz|aa+bb")`)
|
||||
|
||||
// in filter
|
||||
f(`in()`, `in()`)
|
||||
f(`in(foo)`, `in(foo)`)
|
||||
f(`in(foo, bar)`, `in(foo,bar)`)
|
||||
f(`in("foo bar", baz)`, `in("foo bar",baz)`)
|
||||
f(`foo:in(foo-bar|baz)`, `foo:in("foo-bar|baz")`)
|
||||
|
||||
// ipv4_range filter
|
||||
f(`ipv4_range(1.2.3.4, "5.6.7.8")`, `ipv4_range(1.2.3.4, 5.6.7.8)`)
|
||||
f(`foo:ipv4_range(1.2.3.4, "5.6.7.8" , )`, `foo:ipv4_range(1.2.3.4, 5.6.7.8)`)
|
||||
f(`ipv4_range(1.2.3.4)`, `ipv4_range(1.2.3.4, 1.2.3.4)`)
|
||||
f(`ipv4_range(1.2.3.4/20)`, `ipv4_range(1.2.0.0, 1.2.15.255)`)
|
||||
f(`ipv4_range(1.2.3.4,)`, `ipv4_range(1.2.3.4, 1.2.3.4)`)
|
||||
|
||||
// len_range filter
|
||||
f(`len_range(10, 20)`, `len_range(10,20)`)
|
||||
f(`foo:len_range("10", 20, )`, `foo:len_range(10,20)`)
|
||||
|
||||
// range filter
|
||||
f(`range(1.234, 5656.43454)`, `range(1.234,5656.43454)`)
|
||||
f(`foo:range(-2343.344, 2343.4343)`, `foo:range(-2343.344,2343.4343)`)
|
||||
f(`range(-1.234e-5 , 2.34E+3)`, `range(-1.234e-5,2.34E+3)`)
|
||||
f(`range[123, 456)`, `range[123,456)`)
|
||||
f(`range(123, 445]`, `range(123,445]`)
|
||||
f(`range("1.234e-4", -23)`, `range(1.234e-4,-23)`)
|
||||
|
||||
// re filter
|
||||
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)
|
||||
f("re(foo)", `re("foo")`)
|
||||
f(`foo:re(foo-bar|baz.)`, `foo:re("foo-bar|baz.")`)
|
||||
|
||||
// seq filter
|
||||
f(`seq()`, `seq()`)
|
||||
f(`seq(foo)`, `seq(foo)`)
|
||||
f(`seq("foo, bar", baz, abc)`, `seq("foo, bar",baz,abc)`)
|
||||
f(`foo:seq(foo"bar-baz+aa, b)`, `foo:seq("foo\"bar-baz+aa",b)`)
|
||||
|
||||
// string_range filter
|
||||
f(`string_range(foo, bar)`, `string_range(foo, bar)`)
|
||||
f(`foo:string_range("foo, bar", baz)`, `foo:string_range("foo, bar", baz)`)
|
||||
|
||||
// reserved field names
|
||||
f(`"_stream"`, `_stream`)
|
||||
f(`"_time"`, `_time`)
|
||||
f(`"_msg"`, `_msg`)
|
||||
f(`_stream and _time or _msg`, `_stream _time or _msg`)
|
||||
|
||||
// invalid rune
|
||||
f("\xff", `"\xff"`)
|
||||
|
||||
// ip addresses in the query
|
||||
f("1.2.3.4 or ip:5.6.7.9", "1.2.3.4 or ip:5.6.7.9")
|
||||
|
||||
// '-' and '.' chars in field name and search phrase
|
||||
f("trace-id.foo.bar:baz", `trace-id.foo.bar:baz`)
|
||||
f(`custom-Time:2024-01-02T03:04:05+08:00 fooBar OR !baz:xxx`, `custom-Time:"2024-01-02T03:04:05+08:00" fooBar or !baz:xxx`)
|
||||
f("foo-bar+baz*", `"foo-bar+baz"*`)
|
||||
f("foo- bar", `foo- bar`)
|
||||
f("foo -bar", `foo -bar`)
|
||||
f("foo!bar", `"foo!bar"`)
|
||||
f("foo:aa!bb:cc", `foo:"aa!bb:cc"`)
|
||||
f(`foo:bar:baz`, `foo:"bar:baz"`)
|
||||
f(`foo:(bar baz:xxx)`, `foo:bar foo:"baz:xxx"`)
|
||||
f(`foo:(_time:abc or not z)`, `foo:"_time:abc" or !foo:z`)
|
||||
f(`foo:(_msg:a :x _stream:{c="d"})`, `foo:"_msg:a" foo:x foo:"_stream:{c=\"d\"}"`)
|
||||
f(`:(_msg:a:b c)`, `"a:b" c`)
|
||||
f(`"foo"bar baz:"a'b"c`, `"\"foo\"bar" baz:"\"a'b\"c"`)
|
||||
|
||||
// complex queries
|
||||
f(`_time:[-1h, now] _stream:{job="foo",env=~"prod|staging"} level:(error or warn*) and not "connection reset by peer"`,
|
||||
`_time:[-1h,now] _stream:{job="foo",env=~"prod|staging"} (level:error or level:warn*) !"connection reset by peer"`)
|
||||
f(`(_time:(2023-04-20, now] or _time:[-10m, -1m))
|
||||
and (_stream:{job="a"} or _stream:{instance!="b"})
|
||||
and (err* or ip:(ipv4_range(1.2.3.0, 1.2.3.255) and not 1.2.3.4))`,
|
||||
`(_time:(2023-04-20,now] or _time:[-10m,-1m)) (_stream:{job="a"} or _stream:{instance!="b"}) (err* or ip:ipv4_range(1.2.3.0, 1.2.3.255) !ip:1.2.3.4)`)
|
||||
}
|
||||
|
||||
func TestParseQueryFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery(s)
|
||||
if q != nil {
|
||||
t.Fatalf("expecting nil result; got %s", q)
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
}
|
||||
|
||||
f("")
|
||||
f("|")
|
||||
f("foo|")
|
||||
f("foo|bar")
|
||||
f("foo and")
|
||||
f("foo OR ")
|
||||
f("not")
|
||||
f("NOT")
|
||||
f("not (abc")
|
||||
f("!")
|
||||
|
||||
// invalid parens
|
||||
f("(")
|
||||
f("foo (bar ")
|
||||
f("(foo:'bar")
|
||||
|
||||
// missing filter
|
||||
f(":")
|
||||
f(": ")
|
||||
f("foo: ")
|
||||
f("_msg : ")
|
||||
f(`"": `)
|
||||
|
||||
// invalid quoted strings
|
||||
f(`"foo`)
|
||||
f(`'foo`)
|
||||
f("`foo")
|
||||
|
||||
// invalid _stream filters
|
||||
f("_stream:")
|
||||
f("_stream:{")
|
||||
f("_stream:(")
|
||||
f("_stream:{foo")
|
||||
f("_stream:{foo}")
|
||||
f("_stream:{foo=")
|
||||
f("_stream:{foo='bar")
|
||||
f("_stream:{foo='bar}")
|
||||
f("_stream:{foo=bar or")
|
||||
f("_stream:{foo=bar or}")
|
||||
f("_stream:{foo=bar or baz}")
|
||||
f("_stream:{foo=bar baz x=y}")
|
||||
f("_stream:{foo=bar,")
|
||||
f("_stream:{foo=bar")
|
||||
f("_stream:foo")
|
||||
f("_stream:(foo)")
|
||||
f("_stream:[foo]")
|
||||
|
||||
// invalid _time filters
|
||||
f("_time:")
|
||||
f("_time:[")
|
||||
f("_time:foo")
|
||||
f("_time:{}")
|
||||
f("_time:[foo,bar)")
|
||||
f("_time:(now)")
|
||||
f("_time:[now,")
|
||||
f("_time:(now, not now]")
|
||||
f("_time:(-5m, -1m}")
|
||||
f("_time:[-")
|
||||
f("_time:[now-foo,-bar]")
|
||||
f("_time:[2023-ab,2023]")
|
||||
f("_time:[fooo-02,2023]")
|
||||
f("_time:[2023-01-02T04:05:06+12,2023]")
|
||||
f("_time:[2023-01-02T04:05:06-12,2023]")
|
||||
f("_time:2023-01-02T04:05:06.789")
|
||||
|
||||
// long query with error
|
||||
f(`very long query with error aaa ffdfd fdfdfd fdfd:( ffdfdfdfdfd`)
|
||||
|
||||
// query with unexpected tail
|
||||
f(`foo | bar`)
|
||||
|
||||
// unexpected comma
|
||||
f(`foo,bar`)
|
||||
f(`foo, bar`)
|
||||
f(`foo ,bar`)
|
||||
|
||||
// unexpected token
|
||||
f(`[foo`)
|
||||
f(`foo]bar`)
|
||||
f(`foo] bar`)
|
||||
f(`foo ]bar`)
|
||||
f(`) foo`)
|
||||
f(`foo)bar`)
|
||||
|
||||
// unknown function
|
||||
f(`unknown_function(foo)`)
|
||||
|
||||
// invalid exact
|
||||
f(`exact(`)
|
||||
f(`exact(f, b)`)
|
||||
f(`exact(foo`)
|
||||
f(`exact(foo,`)
|
||||
f(`exact(foo*)`)
|
||||
f(`exact(foo bar)`)
|
||||
f(`exact(foo, bar`)
|
||||
|
||||
// invalid i
|
||||
f(`i(`)
|
||||
f(`i(aa`)
|
||||
f(`i(aa, bb)`)
|
||||
f(`i(*`)
|
||||
f(`i(aaa*`)
|
||||
f(`i(a**)`)
|
||||
f(`i("foo`)
|
||||
f(`i(foo bar)`)
|
||||
|
||||
// invalid in
|
||||
f(`in(`)
|
||||
f(`in(,)`)
|
||||
f(`in(f, b c)`)
|
||||
f(`in(foo`)
|
||||
f(`in(foo,`)
|
||||
f(`in(foo*)`)
|
||||
f(`in(foo, "bar baz"*)`)
|
||||
f(`in(foo, "bar baz"*, abc)`)
|
||||
f(`in(foo bar)`)
|
||||
f(`in(foo, bar`)
|
||||
|
||||
// invalid ipv4_range
|
||||
f(`ipv4_range(`)
|
||||
f(`ipv4_range(foo,bar)`)
|
||||
f(`ipv4_range(1.2.3.4*)`)
|
||||
f(`ipv4_range("1.2.3.4"*)`)
|
||||
f(`ipv4_range(1.2.3.4`)
|
||||
f(`ipv4_range(1.2.3.4,`)
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7)`)
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7.8`)
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7.8,`)
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7.8,,`)
|
||||
f(`ipv4_range(1.2.3.4, 5.6.7.8,5.3.2.1)`)
|
||||
|
||||
// invalid len_range
|
||||
f(`len_range(`)
|
||||
f(`len_range(1)`)
|
||||
f(`len_range(foo, bar)`)
|
||||
f(`len_range(1, bar)`)
|
||||
f(`len_range(1, 2`)
|
||||
f(`len_range(1.2, 3.4)`)
|
||||
|
||||
// invalid range
|
||||
f(`range(`)
|
||||
f(`range(foo,bar)`)
|
||||
f(`range(1"`)
|
||||
f(`range(1,`)
|
||||
f(`range(1)`)
|
||||
f(`range(1,)`)
|
||||
f(`range(1,2,`)
|
||||
f(`range[1,foo)`)
|
||||
f(`range[1,2,3)`)
|
||||
f(`range(1)`)
|
||||
|
||||
// invalid re
|
||||
f("re(")
|
||||
f("re(a, b)")
|
||||
f("foo:re(bar")
|
||||
f("re(`ab(`)")
|
||||
f(`re(a b)`)
|
||||
|
||||
// invalid seq
|
||||
f(`seq(`)
|
||||
f(`seq(,)`)
|
||||
f(`seq(foo`)
|
||||
f(`seq(foo,`)
|
||||
f(`seq(foo*)`)
|
||||
f(`seq(foo*, bar)`)
|
||||
f(`seq(foo bar)`)
|
||||
f(`seq(foo, bar`)
|
||||
|
||||
// invalid string_range
|
||||
f(`string_range(`)
|
||||
f(`string_range(,)`)
|
||||
f(`string_range(foo`)
|
||||
f(`string_range(foo,`)
|
||||
f(`string_range(foo*)`)
|
||||
f(`string_range(foo bar)`)
|
||||
f(`string_range(foo, bar`)
|
||||
f(`string_range(foo)`)
|
||||
f(`string_range(foo, bar, baz)`)
|
||||
}
|
102
lib/logstorage/part.go
Normal file
102
lib/logstorage/part.go
Normal file
|
@ -0,0 +1,102 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
type part struct {
|
||||
// pt is the partition the part belongs to
|
||||
pt *partition
|
||||
|
||||
// path is the path to the part on disk.
|
||||
//
|
||||
// If the part is in-memory then the path is empty.
|
||||
path string
|
||||
|
||||
// ph contains partHeader for the given part.
|
||||
ph partHeader
|
||||
|
||||
// indexBlockHeaders contains a list of indexBlockHeader entries for the given part.
|
||||
indexBlockHeaders []indexBlockHeader
|
||||
|
||||
indexFile fs.MustReadAtCloser
|
||||
columnsHeaderFile fs.MustReadAtCloser
|
||||
timestampsFile fs.MustReadAtCloser
|
||||
fieldValuesFile fs.MustReadAtCloser
|
||||
fieldBloomFilterFile fs.MustReadAtCloser
|
||||
messageValuesFile fs.MustReadAtCloser
|
||||
messageBloomFilterFile fs.MustReadAtCloser
|
||||
}
|
||||
|
||||
func mustOpenInmemoryPart(pt *partition, mp *inmemoryPart) *part {
|
||||
var p part
|
||||
p.pt = pt
|
||||
p.path = ""
|
||||
p.ph = mp.ph
|
||||
|
||||
// Read metaindex
|
||||
metaindexReader := mp.metaindex.NewReader()
|
||||
var mrs readerWithStats
|
||||
mrs.init(metaindexReader)
|
||||
p.indexBlockHeaders = mustReadIndexBlockHeaders(p.indexBlockHeaders[:0], &mrs)
|
||||
|
||||
// Open data files
|
||||
p.indexFile = &mp.index
|
||||
p.columnsHeaderFile = &mp.columnsHeader
|
||||
p.timestampsFile = &mp.timestamps
|
||||
p.fieldValuesFile = &mp.fieldValues
|
||||
p.fieldBloomFilterFile = &mp.fieldBloomFilter
|
||||
p.messageValuesFile = &mp.messageValues
|
||||
p.messageBloomFilterFile = &mp.messageBloomFilter
|
||||
|
||||
return &p
|
||||
}
|
||||
|
||||
func mustOpenFilePart(pt *partition, path string) *part {
|
||||
var p part
|
||||
p.pt = pt
|
||||
p.path = path
|
||||
p.ph.mustReadMetadata(path)
|
||||
|
||||
metaindexPath := filepath.Join(path, metaindexFilename)
|
||||
indexPath := filepath.Join(path, indexFilename)
|
||||
columnsHeaderPath := filepath.Join(path, columnsHeaderFilename)
|
||||
timestampsPath := filepath.Join(path, timestampsFilename)
|
||||
fieldValuesPath := filepath.Join(path, fieldValuesFilename)
|
||||
fieldBloomFilterPath := filepath.Join(path, fieldBloomFilename)
|
||||
messageValuesPath := filepath.Join(path, messageValuesFilename)
|
||||
messageBloomFilterPath := filepath.Join(path, messageBloomFilename)
|
||||
|
||||
// Read metaindex
|
||||
metaindexReader := filestream.MustOpen(metaindexPath, true)
|
||||
var mrs readerWithStats
|
||||
mrs.init(metaindexReader)
|
||||
p.indexBlockHeaders = mustReadIndexBlockHeaders(p.indexBlockHeaders[:0], &mrs)
|
||||
mrs.MustClose()
|
||||
|
||||
// Open data files
|
||||
p.indexFile = fs.MustOpenReaderAt(indexPath)
|
||||
p.columnsHeaderFile = fs.MustOpenReaderAt(columnsHeaderPath)
|
||||
p.timestampsFile = fs.MustOpenReaderAt(timestampsPath)
|
||||
p.fieldValuesFile = fs.MustOpenReaderAt(fieldValuesPath)
|
||||
p.fieldBloomFilterFile = fs.MustOpenReaderAt(fieldBloomFilterPath)
|
||||
p.messageValuesFile = fs.MustOpenReaderAt(messageValuesPath)
|
||||
p.messageBloomFilterFile = fs.MustOpenReaderAt(messageBloomFilterPath)
|
||||
|
||||
return &p
|
||||
}
|
||||
|
||||
func mustClosePart(p *part) {
|
||||
p.indexFile.MustClose()
|
||||
p.columnsHeaderFile.MustClose()
|
||||
p.timestampsFile.MustClose()
|
||||
p.fieldValuesFile.MustClose()
|
||||
p.fieldBloomFilterFile.MustClose()
|
||||
p.messageValuesFile.MustClose()
|
||||
p.messageBloomFilterFile.MustClose()
|
||||
|
||||
p.pt = nil
|
||||
}
|
84
lib/logstorage/part_header.go
Normal file
84
lib/logstorage/part_header.go
Normal file
|
@ -0,0 +1,84 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// partHeader contains the information about a single part
|
||||
type partHeader struct {
|
||||
// CompressedSizeBytes is physical size of the part
|
||||
CompressedSizeBytes uint64
|
||||
|
||||
// UncompressedSizeBytes is the original size of log entries stored in the part
|
||||
UncompressedSizeBytes uint64
|
||||
|
||||
// RowsCount is the number of log entries in the part
|
||||
RowsCount uint64
|
||||
|
||||
// BlocksCount is the number of blocks in the part
|
||||
BlocksCount uint64
|
||||
|
||||
// MinTimestamp is the minimum timestamp seen in the part
|
||||
MinTimestamp int64
|
||||
|
||||
// MaxTimestamp is the maximum timestamp seen in the part
|
||||
MaxTimestamp int64
|
||||
}
|
||||
|
||||
// reset resets ph for subsequent re-use
|
||||
func (ph *partHeader) reset() {
|
||||
ph.CompressedSizeBytes = 0
|
||||
ph.UncompressedSizeBytes = 0
|
||||
ph.RowsCount = 0
|
||||
ph.BlocksCount = 0
|
||||
ph.MinTimestamp = 0
|
||||
ph.MaxTimestamp = 0
|
||||
}
|
||||
|
||||
// String returns string represenation for ph.
|
||||
func (ph *partHeader) String() string {
|
||||
return fmt.Sprintf("{CompressedSizeBytes=%d, UncompressedSizeBytes=%d, RowsCount=%d, BlocksCount=%d, MinTimestamp=%s, MaxTimestamp=%s}",
|
||||
ph.CompressedSizeBytes, ph.UncompressedSizeBytes, ph.RowsCount, ph.BlocksCount, timestampToString(ph.MinTimestamp), timestampToString(ph.MaxTimestamp))
|
||||
}
|
||||
|
||||
func (ph *partHeader) mustReadMetadata(partPath string) {
|
||||
ph.reset()
|
||||
|
||||
metadataPath := filepath.Join(partPath, metadataFilename)
|
||||
metadata, err := os.ReadFile(metadataPath)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot read %q: %s", metadataPath, err)
|
||||
}
|
||||
if err := json.Unmarshal(metadata, ph); err != nil {
|
||||
logger.Panicf("FATAL: cannot parse %q: %s", metadataPath, err)
|
||||
}
|
||||
|
||||
// Perform various checks
|
||||
if ph.MinTimestamp > ph.MaxTimestamp {
|
||||
logger.Panicf("FATAL: MinTimestamp cannot exceed MaxTimestamp; got %d vs %d", ph.MinTimestamp, ph.MaxTimestamp)
|
||||
}
|
||||
}
|
||||
|
||||
func (ph *partHeader) mustWriteMetadata(partPath string) {
|
||||
metadata, err := json.Marshal(ph)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot marshal partHeader: %s", err)
|
||||
}
|
||||
metadataPath := filepath.Join(partPath, metadataFilename)
|
||||
fs.MustWriteSync(metadataPath, metadata)
|
||||
}
|
||||
|
||||
func timestampToString(timestamp int64) string {
|
||||
t := time.Unix(0, timestamp).UTC()
|
||||
return strings.Replace(t.Format(timestampForPathname), ".", "", 1)
|
||||
}
|
||||
|
||||
const timestampForPathname = "20060102150405.000000000"
|
21
lib/logstorage/part_header_test.go
Normal file
21
lib/logstorage/part_header_test.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPartHeaderReset(t *testing.T) {
|
||||
ph := &partHeader{
|
||||
CompressedSizeBytes: 123,
|
||||
UncompressedSizeBytes: 234,
|
||||
RowsCount: 1234,
|
||||
MinTimestamp: 3434,
|
||||
MaxTimestamp: 32434,
|
||||
}
|
||||
ph.reset()
|
||||
phZero := &partHeader{}
|
||||
if !reflect.DeepEqual(ph, phZero) {
|
||||
t.Fatalf("unexpected non-zero partHeader after reset: %v", ph)
|
||||
}
|
||||
}
|
237
lib/logstorage/partition.go
Normal file
237
lib/logstorage/partition.go
Normal file
|
@ -0,0 +1,237 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// PartitionStats contains stats for the partition.
|
||||
type PartitionStats struct {
|
||||
DatadbStats
|
||||
IndexdbStats
|
||||
}
|
||||
|
||||
type partition struct {
|
||||
// s is the parent storage for the partition
|
||||
s *Storage
|
||||
|
||||
// path is the path to the partition directory
|
||||
path string
|
||||
|
||||
// name is the partition name. It is basically the directory name obtained from path.
|
||||
// It is used for creating keys for partition caches.
|
||||
name string
|
||||
|
||||
// idb is indexdb used for the given partition
|
||||
idb *indexdb
|
||||
|
||||
// ddb is the datadb used for the given partition
|
||||
ddb *datadb
|
||||
}
|
||||
|
||||
// mustCreatePartition creates a partition at the given path.
|
||||
//
|
||||
// The created partition can be opened with mustOpenPartition() after is has been created.
|
||||
//
|
||||
// The created partition can be deleted with mustDeletePartition() when it is no longer needed.
|
||||
func mustCreatePartition(path string) {
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
|
||||
indexdbPath := filepath.Join(path, indexdbDirname)
|
||||
mustCreateIndexdb(indexdbPath)
|
||||
|
||||
datadbPath := filepath.Join(path, datadbDirname)
|
||||
mustCreateDatadb(datadbPath)
|
||||
}
|
||||
|
||||
// mustDeletePartition deletes partition at the given path.
|
||||
//
|
||||
// The partition must be closed with MustClose before deleting it.
|
||||
func mustDeletePartition(path string) {
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
// mustOpenPartition opens partition at the given path for the given Storage.
|
||||
//
|
||||
// The returned partition must be closed when no longer needed with mustClosePartition() call.
|
||||
func mustOpenPartition(s *Storage, path string) *partition {
|
||||
name := filepath.Base(path)
|
||||
|
||||
// Open indexdb
|
||||
indexdbPath := filepath.Join(path, indexdbDirname)
|
||||
idb := mustOpenIndexdb(indexdbPath, name, s)
|
||||
|
||||
// Start initializing the partition
|
||||
pt := &partition{
|
||||
s: s,
|
||||
path: path,
|
||||
name: name,
|
||||
idb: idb,
|
||||
}
|
||||
|
||||
// Open datadb
|
||||
datadbPath := filepath.Join(path, datadbDirname)
|
||||
pt.ddb = mustOpenDatadb(pt, datadbPath, s.flushInterval)
|
||||
|
||||
return pt
|
||||
}
|
||||
|
||||
// mustClosePartition closes pt.
|
||||
//
|
||||
// The caller must ensure that pt is no longer used before the call to mustClosePartition().
|
||||
//
|
||||
// The partition can be deleted if needed after it is closed via mustDeletePartition() call.
|
||||
func mustClosePartition(pt *partition) {
|
||||
// Close indexdb
|
||||
mustCloseIndexdb(pt.idb)
|
||||
pt.idb = nil
|
||||
|
||||
// Close datadb
|
||||
mustCloseDatadb(pt.ddb)
|
||||
pt.ddb = nil
|
||||
|
||||
pt.name = ""
|
||||
pt.path = ""
|
||||
pt.s = nil
|
||||
}
|
||||
|
||||
func (pt *partition) mustAddRows(lr *LogRows) {
|
||||
// Register rows in indexdb
|
||||
var pendingRows []int
|
||||
streamIDs := lr.streamIDs
|
||||
for i := range lr.timestamps {
|
||||
streamID := &streamIDs[i]
|
||||
if pt.hasStreamIDInCache(streamID) {
|
||||
continue
|
||||
}
|
||||
if len(pendingRows) == 0 || !streamIDs[pendingRows[len(pendingRows)-1]].equal(streamID) {
|
||||
pendingRows = append(pendingRows, i)
|
||||
}
|
||||
}
|
||||
if len(pendingRows) > 0 {
|
||||
logNewStreams := pt.s.logNewStreams
|
||||
streamTagsCanonicals := lr.streamTagsCanonicals
|
||||
sort.Slice(pendingRows, func(i, j int) bool {
|
||||
return streamIDs[pendingRows[i]].less(&streamIDs[pendingRows[j]])
|
||||
})
|
||||
for i, rowIdx := range pendingRows {
|
||||
streamID := &streamIDs[rowIdx]
|
||||
if i > 0 && streamIDs[pendingRows[i-1]].equal(streamID) {
|
||||
continue
|
||||
}
|
||||
if pt.hasStreamIDInCache(streamID) {
|
||||
continue
|
||||
}
|
||||
if !pt.idb.hasStreamID(streamID) {
|
||||
streamTagsCanonical := streamTagsCanonicals[rowIdx]
|
||||
pt.idb.mustRegisterStream(streamID, streamTagsCanonical)
|
||||
if logNewStreams {
|
||||
pt.logNewStream(streamTagsCanonical, lr.rows[rowIdx])
|
||||
}
|
||||
}
|
||||
pt.putStreamIDToCache(streamID)
|
||||
}
|
||||
}
|
||||
|
||||
// Add rows to datadb
|
||||
pt.ddb.mustAddRows(lr)
|
||||
if pt.s.logIngestedRows {
|
||||
pt.logIngestedRows(lr)
|
||||
}
|
||||
}
|
||||
|
||||
func (pt *partition) logNewStream(streamTagsCanonical []byte, fields []Field) {
|
||||
streamTags := getStreamTagsString(streamTagsCanonical)
|
||||
rf := RowFormatter(fields)
|
||||
logger.Infof("partition %s: new stream %s for log entry %s", pt.path, streamTags, &rf)
|
||||
}
|
||||
|
||||
func (pt *partition) logIngestedRows(lr *LogRows) {
|
||||
var rf RowFormatter
|
||||
for i, fields := range lr.rows {
|
||||
tf := TimeFormatter(lr.timestamps[i])
|
||||
streamTags := getStreamTagsString(lr.streamTagsCanonicals[i])
|
||||
rf = append(rf[:0], fields...)
|
||||
rf = append(rf, Field{
|
||||
Name: "_time",
|
||||
Value: tf.String(),
|
||||
})
|
||||
rf = append(rf, Field{
|
||||
Name: "_stream",
|
||||
Value: streamTags,
|
||||
})
|
||||
sort.Slice(rf, func(i, j int) bool {
|
||||
return rf[i].Name < rf[j].Name
|
||||
})
|
||||
logger.Infof("partition %s: new log entry %s", pt.path, &rf)
|
||||
}
|
||||
}
|
||||
|
||||
// appendStreamTagsByStreamID appends canonical representation of stream tags for the given sid to dst
|
||||
// and returns the result.
|
||||
func (pt *partition) appendStreamTagsByStreamID(dst []byte, sid *streamID) []byte {
|
||||
// Search for the StreamTags in the cache.
|
||||
key := bbPool.Get()
|
||||
defer bbPool.Put(key)
|
||||
|
||||
// There is no need in putting partition name into key here,
|
||||
// since StreamTags is uniquely identified by streamID.
|
||||
key.B = sid.marshal(key.B)
|
||||
dstLen := len(dst)
|
||||
dst = pt.s.streamTagsCache.GetBig(dst, key.B)
|
||||
if len(dst) > dstLen {
|
||||
// Fast path - the StreamTags have been found in cache.
|
||||
return dst
|
||||
}
|
||||
|
||||
// Slow path - search for StreamTags in idb
|
||||
dst = pt.idb.appendStreamTagsByStreamID(dst, sid)
|
||||
if len(dst) > dstLen {
|
||||
// Store the found StreamTags to cache
|
||||
pt.s.streamTagsCache.SetBig(key.B, dst[dstLen:])
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (pt *partition) hasStreamIDInCache(sid *streamID) bool {
|
||||
var result [1]byte
|
||||
|
||||
bb := bbPool.Get()
|
||||
bb.B = pt.marshalStreamIDCacheKey(bb.B, sid)
|
||||
value := pt.s.streamIDCache.Get(result[:0], bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return bytes.Equal(value, okValue)
|
||||
}
|
||||
|
||||
func (pt *partition) putStreamIDToCache(sid *streamID) {
|
||||
bb := bbPool.Get()
|
||||
bb.B = pt.marshalStreamIDCacheKey(bb.B, sid)
|
||||
pt.s.streamIDCache.Set(bb.B, okValue)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func (pt *partition) marshalStreamIDCacheKey(dst []byte, sid *streamID) []byte {
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(pt.name))
|
||||
dst = sid.marshal(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
var okValue = []byte("1")
|
||||
|
||||
// debugFlush makes sure that all the recently ingested data data becomes searchable
|
||||
func (pt *partition) debugFlush() {
|
||||
pt.ddb.debugFlush()
|
||||
pt.idb.debugFlush()
|
||||
}
|
||||
|
||||
func (pt *partition) updateStats(ps *PartitionStats) {
|
||||
pt.ddb.updateStats(&ps.DatadbStats)
|
||||
pt.idb.updateStats(&ps.IndexdbStats)
|
||||
}
|
187
lib/logstorage/partition_test.go
Normal file
187
lib/logstorage/partition_test.go
Normal file
|
@ -0,0 +1,187 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||
)
|
||||
|
||||
func TestPartitionLifecycle(t *testing.T) {
|
||||
const path = "TestPartitionLifecycle"
|
||||
var ddbStats DatadbStats
|
||||
|
||||
s := newTestStorage()
|
||||
for i := 0; i < 3; i++ {
|
||||
mustCreatePartition(path)
|
||||
for j := 0; j < 2; j++ {
|
||||
pt := mustOpenPartition(s, path)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != 0 {
|
||||
t.Fatalf("unexpected non-zero number of entries in empty partition: %d", n)
|
||||
}
|
||||
if ddbStats.InmemoryParts != 0 {
|
||||
t.Fatalf("unexpected non-zero number of in-memory parts in empty partition: %d", ddbStats.InmemoryParts)
|
||||
}
|
||||
if ddbStats.FileParts != 0 {
|
||||
t.Fatalf("unexpected non-zero number of file parts in empty partition: %d", ddbStats.FileParts)
|
||||
}
|
||||
if ddbStats.CompressedInmemorySize != 0 {
|
||||
t.Fatalf("unexpected non-zero size of inmemory parts for empty partition")
|
||||
}
|
||||
if ddbStats.CompressedFileSize != 0 {
|
||||
t.Fatalf("unexpected non-zero size of file parts for empty partition")
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
mustClosePartition(pt)
|
||||
}
|
||||
mustDeletePartition(path)
|
||||
}
|
||||
closeTestStorage(s)
|
||||
}
|
||||
|
||||
func TestPartitionMustAddRowsSerial(t *testing.T) {
|
||||
const path = "TestPartitionMustAddRowsSerial"
|
||||
var ddbStats DatadbStats
|
||||
|
||||
s := newTestStorage()
|
||||
mustCreatePartition(path)
|
||||
pt := mustOpenPartition(s, path)
|
||||
|
||||
// Try adding the same entry at a time.
|
||||
totalRowsCount := uint64(0)
|
||||
for i := 0; i < 100; i++ {
|
||||
lr := newTestLogRows(1, 1, 0)
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
pt.mustAddRows(lr)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in partition; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Try adding different entry at a time.
|
||||
for i := 0; i < 100; i++ {
|
||||
lr := newTestLogRows(1, 1, int64(i))
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
pt.mustAddRows(lr)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in partition; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-open the partition and verify the number of entries remains the same
|
||||
mustClosePartition(pt)
|
||||
pt = mustOpenPartition(s, path)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries after re-opening the partition; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
if ddbStats.InmemoryParts != 0 {
|
||||
t.Fatalf("unexpected non-zero number of in-memory parts after re-opening the partition: %d", ddbStats.InmemoryParts)
|
||||
}
|
||||
if ddbStats.FileParts == 0 {
|
||||
t.Fatalf("the number of file parts must be greater than 0 after re-opening the partition")
|
||||
}
|
||||
|
||||
// Try adding entries for multiple streams at a time
|
||||
for i := 0; i < 5; i++ {
|
||||
lr := newTestLogRows(3, 7, 0)
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
pt.mustAddRows(lr)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in partition; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
// Re-open the partition and verify the number of entries remains the same
|
||||
mustClosePartition(pt)
|
||||
pt = mustOpenPartition(s, path)
|
||||
ddbStats.reset()
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries after re-opening the partition; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
if ddbStats.InmemoryParts != 0 {
|
||||
t.Fatalf("unexpected non-zero number of in-memory parts after re-opening the partition: %d", ddbStats.InmemoryParts)
|
||||
}
|
||||
if ddbStats.FileParts == 0 {
|
||||
t.Fatalf("the number of file parts must be greater than 0 after re-opening the partition")
|
||||
}
|
||||
|
||||
mustClosePartition(pt)
|
||||
mustDeletePartition(path)
|
||||
|
||||
closeTestStorage(s)
|
||||
}
|
||||
|
||||
func TestPartitionMustAddRowsConcurrent(t *testing.T) {
|
||||
const path = "TestPartitionMustAddRowsConcurrent"
|
||||
s := newTestStorage()
|
||||
|
||||
mustCreatePartition(path)
|
||||
pt := mustOpenPartition(s, path)
|
||||
|
||||
const workersCount = 3
|
||||
totalRowsCount := uint64(0)
|
||||
doneCh := make(chan struct{}, workersCount)
|
||||
for i := 0; i < cap(doneCh); i++ {
|
||||
go func() {
|
||||
for j := 0; j < 7; j++ {
|
||||
lr := newTestLogRows(5, 10, int64(j))
|
||||
pt.mustAddRows(lr)
|
||||
atomic.AddUint64(&totalRowsCount, uint64(len(lr.timestamps)))
|
||||
}
|
||||
doneCh <- struct{}{}
|
||||
}()
|
||||
}
|
||||
timer := timerpool.Get(time.Second)
|
||||
defer timerpool.Put(timer)
|
||||
for i := 0; i < cap(doneCh); i++ {
|
||||
select {
|
||||
case <-doneCh:
|
||||
case <-timer.C:
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
|
||||
var ddbStats DatadbStats
|
||||
pt.ddb.updateStats(&ddbStats)
|
||||
if n := ddbStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
|
||||
mustClosePartition(pt)
|
||||
mustDeletePartition(path)
|
||||
|
||||
closeTestStorage(s)
|
||||
}
|
||||
|
||||
// newTestStorage creates new storage for tests.
|
||||
//
|
||||
// When the storage is no longer needed, closeTestStorage() must be called.
|
||||
func newTestStorage() *Storage {
|
||||
streamIDCache := workingsetcache.New(1024 * 1024)
|
||||
streamFilterCache := workingsetcache.New(1024 * 1024)
|
||||
return &Storage{
|
||||
flushInterval: time.Second,
|
||||
streamIDCache: streamIDCache,
|
||||
streamFilterCache: streamFilterCache,
|
||||
}
|
||||
}
|
||||
|
||||
// closeTestStorage closes storage created via newTestStorage().
|
||||
func closeTestStorage(s *Storage) {
|
||||
s.streamIDCache.Stop()
|
||||
s.streamFilterCache.Stop()
|
||||
}
|
123
lib/logstorage/rows.go
Normal file
123
lib/logstorage/rows.go
Normal file
|
@ -0,0 +1,123 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
// Field is a single field for the log entry.
|
||||
type Field struct {
|
||||
// Name is the name of the field
|
||||
Name string
|
||||
|
||||
// Value is the value of the field
|
||||
Value string
|
||||
}
|
||||
|
||||
// Reset resets f for future re-use.
|
||||
func (f *Field) Reset() {
|
||||
f.Name = ""
|
||||
f.Value = ""
|
||||
}
|
||||
|
||||
// String returns string representation of f.
|
||||
func (f *Field) String() string {
|
||||
name := f.Name
|
||||
if name == "" {
|
||||
name = "_msg"
|
||||
}
|
||||
return fmt.Sprintf("%q:%q", name, f.Value)
|
||||
}
|
||||
|
||||
func (f *Field) marshal(dst []byte) []byte {
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(f.Name))
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(f.Value))
|
||||
return dst
|
||||
}
|
||||
|
||||
func (f *Field) unmarshal(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
|
||||
// Unmarshal field name
|
||||
tail, b, err := encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal field name: %w", err)
|
||||
}
|
||||
// Do not use bytesutil.InternBytes(b) here, since it works slower than the string(b) in prod
|
||||
f.Name = string(b)
|
||||
src = tail
|
||||
|
||||
// Unmarshal field value
|
||||
tail, b, err = encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal field value: %w", err)
|
||||
}
|
||||
// Do not use bytesutil.InternBytes(b) here, since it works slower than the string(b) in prod
|
||||
f.Value = string(b)
|
||||
src = tail
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
// rows is an aux structure used during rows merge
|
||||
type rows struct {
|
||||
fieldsBuf []Field
|
||||
|
||||
timestamps []int64
|
||||
|
||||
rows [][]Field
|
||||
}
|
||||
|
||||
// reset resets rs
|
||||
func (rs *rows) reset() {
|
||||
fb := rs.fieldsBuf
|
||||
for i := range fb {
|
||||
fb[i].Reset()
|
||||
}
|
||||
rs.fieldsBuf = fb[:0]
|
||||
|
||||
rs.timestamps = rs.timestamps[:0]
|
||||
|
||||
rows := rs.rows
|
||||
for i := range rows {
|
||||
rows[i] = nil
|
||||
}
|
||||
rs.rows = rows[:0]
|
||||
}
|
||||
|
||||
// appendRows appends rows with the given timestamps to rs.
|
||||
func (rs *rows) appendRows(timestamps []int64, rows [][]Field) {
|
||||
rs.timestamps = append(rs.timestamps, timestamps...)
|
||||
|
||||
fieldsBuf := rs.fieldsBuf
|
||||
for _, fields := range rows {
|
||||
fieldsLen := len(fieldsBuf)
|
||||
fieldsBuf = append(fieldsBuf, fields...)
|
||||
rs.rows = append(rs.rows, fieldsBuf[fieldsLen:])
|
||||
}
|
||||
rs.fieldsBuf = fieldsBuf
|
||||
}
|
||||
|
||||
// mergeRows merges the args and appends them to rs.
|
||||
func (rs *rows) mergeRows(timestampsA, timestampsB []int64, fieldsA, fieldsB [][]Field) {
|
||||
for len(timestampsA) > 0 && len(timestampsB) > 0 {
|
||||
i := 0
|
||||
minTimestamp := timestampsB[0]
|
||||
for i < len(timestampsA) && timestampsA[i] <= minTimestamp {
|
||||
i++
|
||||
}
|
||||
rs.appendRows(timestampsA[:i], fieldsA[:i])
|
||||
fieldsA = fieldsA[i:]
|
||||
timestampsA = timestampsA[i:]
|
||||
|
||||
fieldsA, fieldsB = fieldsB, fieldsA
|
||||
timestampsA, timestampsB = timestampsB, timestampsA
|
||||
}
|
||||
if len(timestampsA) == 0 {
|
||||
rs.appendRows(timestampsB, fieldsB)
|
||||
} else {
|
||||
rs.appendRows(timestampsA, fieldsA)
|
||||
}
|
||||
}
|
287
lib/logstorage/rows_test.go
Normal file
287
lib/logstorage/rows_test.go
Normal file
|
@ -0,0 +1,287 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetRowsSizeBytes(t *testing.T) {
|
||||
f := func(rows [][]Field, uncompressedSizeBytesExpected int) {
|
||||
t.Helper()
|
||||
sizeBytes := uncompressedRowsSizeBytes(rows)
|
||||
if sizeBytes != uint64(uncompressedSizeBytesExpected) {
|
||||
t.Fatalf("unexpected sizeBytes; got %d; want %d", sizeBytes, uncompressedSizeBytesExpected)
|
||||
}
|
||||
}
|
||||
f(nil, 0)
|
||||
f([][]Field{}, 0)
|
||||
f([][]Field{{}}, 35)
|
||||
f([][]Field{{{Name: "foo"}}}, 40)
|
||||
|
||||
_, rows := newTestRows(1000, 10)
|
||||
f(rows, 233900)
|
||||
}
|
||||
|
||||
func TestRowsAppendRows(t *testing.T) {
|
||||
var rs rows
|
||||
|
||||
timestamps := []int64{1}
|
||||
rows := [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
rs.appendRows(timestamps, rows)
|
||||
if len(rs.timestamps) != 1 {
|
||||
t.Fatalf("unexpected number of row items; got %d; want 1", len(rs.timestamps))
|
||||
}
|
||||
rs.appendRows(timestamps, rows)
|
||||
if len(rs.timestamps) != 2 {
|
||||
t.Fatalf("unexpected number of row items; got %d; want 2", len(rs.timestamps))
|
||||
}
|
||||
for i := range rs.timestamps {
|
||||
if rs.timestamps[i] != timestamps[0] {
|
||||
t.Fatalf("unexpected timestamps copied; got %d; want %d", rs.timestamps[i], timestamps[0])
|
||||
}
|
||||
if !reflect.DeepEqual(rs.rows[i], rows[0]) {
|
||||
t.Fatalf("unexpected fields copied\ngot\n%v\nwant\n%v", rs.rows[i], rows[0])
|
||||
}
|
||||
}
|
||||
|
||||
// append multiple log entries
|
||||
timestamps, rows = newTestRows(100, 4)
|
||||
rs.appendRows(timestamps, rows)
|
||||
if len(rs.timestamps) != 102 {
|
||||
t.Fatalf("unexpected number of row items; got %d; want 102", len(rs.timestamps))
|
||||
}
|
||||
for i := range timestamps {
|
||||
if rs.timestamps[i+2] != timestamps[i] {
|
||||
t.Fatalf("unexpected timestamps copied; got %d; want %d", rs.timestamps[i+2], timestamps[i])
|
||||
}
|
||||
if !reflect.DeepEqual(rs.rows[i+2], rows[i]) {
|
||||
t.Fatalf("unexpected log entry copied\ngot\n%v\nwant\n%v", rs.rows[i+2], rows[i])
|
||||
}
|
||||
}
|
||||
|
||||
// reset rows
|
||||
rs.reset()
|
||||
if len(rs.timestamps) != 0 {
|
||||
t.Fatalf("unexpected non-zero number of row items after reset: %d", len(rs.timestamps))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeRows(t *testing.T) {
|
||||
f := func(timestampsA, timestampsB []int64, fieldsA, fieldsB [][]Field, timestampsExpected []int64, rowsExpected [][]Field) {
|
||||
t.Helper()
|
||||
var rs rows
|
||||
rs.mergeRows(timestampsA, timestampsB, fieldsA, fieldsB)
|
||||
if !reflect.DeepEqual(rs.timestamps, timestampsExpected) {
|
||||
t.Fatalf("unexpected timestamps after merge\ngot\n%v\nwant\n%v", rs.timestamps, timestampsExpected)
|
||||
}
|
||||
if !reflect.DeepEqual(rs.rows, rowsExpected) {
|
||||
t.Fatalf("unexpected rows after merge\ngot\n%v\nwant\n%v", rs.rows, rowsExpected)
|
||||
}
|
||||
|
||||
// check that the result doesn't change when merging in reverse order
|
||||
rs.reset()
|
||||
rs.mergeRows(timestampsB, timestampsA, fieldsB, fieldsA)
|
||||
if !reflect.DeepEqual(rs.timestamps, timestampsExpected) {
|
||||
t.Fatalf("unexpected timestamps after reverse merge\ngot\n%v\nwant\n%v", rs.timestamps, timestampsExpected)
|
||||
}
|
||||
if !reflect.DeepEqual(rs.rows, rowsExpected) {
|
||||
t.Fatalf("unexpected rows after reverse merge\ngot\n%v\nwant\n%v", rs.rows, rowsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, nil, nil, nil, nil, nil)
|
||||
|
||||
// merge single entry with zero entries
|
||||
timestampsA := []int64{123}
|
||||
timestampsB := []int64{}
|
||||
|
||||
fieldsA := [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
fieldsB := [][]Field{}
|
||||
|
||||
resultTimestamps := []int64{123}
|
||||
resultFields := [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestampsA, timestampsB, fieldsA, fieldsB, resultTimestamps, resultFields)
|
||||
|
||||
// merge two single entries
|
||||
timestampsA = []int64{123}
|
||||
timestampsB = []int64{43323}
|
||||
|
||||
fieldsA = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
}
|
||||
fieldsB = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "asdfds",
|
||||
Value: "asdfsa",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
resultTimestamps = []int64{123, 43323}
|
||||
resultFields = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "asdfds",
|
||||
Value: "asdfsa",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestampsA, timestampsB, fieldsA, fieldsB, resultTimestamps, resultFields)
|
||||
|
||||
// merge identical entries
|
||||
timestampsA = []int64{123, 456}
|
||||
timestampsB = []int64{123, 456}
|
||||
|
||||
fieldsA = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
}
|
||||
fieldsB = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
resultTimestamps = []int64{123, 123, 456, 456}
|
||||
resultFields = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "baz",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestampsA, timestampsB, fieldsA, fieldsB, resultTimestamps, resultFields)
|
||||
|
||||
// merge interleaved entries
|
||||
timestampsA = []int64{12, 13432}
|
||||
timestampsB = []int64{3, 43323}
|
||||
|
||||
fieldsA = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "xfoo",
|
||||
Value: "xbar",
|
||||
},
|
||||
},
|
||||
}
|
||||
fieldsB = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "asd",
|
||||
Value: "assa",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "asdfds",
|
||||
Value: "asdfsa",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
resultTimestamps = []int64{3, 12, 13432, 43323}
|
||||
resultFields = [][]Field{
|
||||
{
|
||||
{
|
||||
Name: "asd",
|
||||
Value: "assa",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "xfoo",
|
||||
Value: "xbar",
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
Name: "asdfds",
|
||||
Value: "asdfsa",
|
||||
},
|
||||
},
|
||||
}
|
||||
f(timestampsA, timestampsB, fieldsA, fieldsB, resultTimestamps, resultFields)
|
||||
}
|
532
lib/logstorage/storage.go
Normal file
532
lib/logstorage/storage.go
Normal file
|
@ -0,0 +1,532 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||
)
|
||||
|
||||
// StorageStats represents stats for the storage. It may be obtained by calling Storage.UpdateStats().
|
||||
type StorageStats struct {
|
||||
// RowsDroppedTooBigTimestamp is the number of rows dropped during data ingestion because their timestamp is smaller than the minimum allowed
|
||||
RowsDroppedTooBigTimestamp uint64
|
||||
|
||||
// RowsDroppedTooSmallTimestamp is the number of rows dropped during data ingestion because their timestamp is bigger than the maximum allowed
|
||||
RowsDroppedTooSmallTimestamp uint64
|
||||
|
||||
// PartitionsCount is the number of partitions in the storage
|
||||
PartitionsCount uint64
|
||||
|
||||
PartitionStats
|
||||
}
|
||||
|
||||
// Reset resets s.
|
||||
func (s *StorageStats) Reset() {
|
||||
*s = StorageStats{}
|
||||
}
|
||||
|
||||
// StorageConfig is the config for the Storage.
|
||||
type StorageConfig struct {
|
||||
// Retention is the retention for the ingested data.
|
||||
//
|
||||
// Older data is automatically deleted.
|
||||
Retention time.Duration
|
||||
|
||||
// FlushInterval is the interval for flushing the in-memory data to disk at the Storage
|
||||
FlushInterval time.Duration
|
||||
|
||||
// FutureRetention is the allowed retention from the current time to future for the ingested data.
|
||||
//
|
||||
// Log entries with timestamps bigger than now+FutureRetention are ignored.
|
||||
FutureRetention time.Duration
|
||||
|
||||
// LogNewStreams indicates whether to log newly created log streams.
|
||||
//
|
||||
// This can be useful for debugging of high cardinality issues.
|
||||
// https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality
|
||||
LogNewStreams bool
|
||||
|
||||
// LogIngestedRows indicates whether to log the ingested log entries.
|
||||
//
|
||||
// This can be useful for debugging of data ingestion.
|
||||
LogIngestedRows bool
|
||||
}
|
||||
|
||||
// Storage is the storage for log entries.
|
||||
type Storage struct {
|
||||
rowsDroppedTooBigTimestamp uint64
|
||||
rowsDroppedTooSmallTimestamp uint64
|
||||
|
||||
// path is the path to the Storage directory
|
||||
path string
|
||||
|
||||
// retention is the retention for the stored data
|
||||
//
|
||||
// older data is automatically deleted
|
||||
retention time.Duration
|
||||
|
||||
// flushInterval is the interval for flushing in-memory data to disk
|
||||
flushInterval time.Duration
|
||||
|
||||
// futureRetention is the maximum allowed interval to write data into the future
|
||||
futureRetention time.Duration
|
||||
|
||||
// logNewStreams instructs to log new streams if it is set to true
|
||||
logNewStreams bool
|
||||
|
||||
// logIngestedRows instructs to log all the ingested log entries if it is set to true
|
||||
logIngestedRows bool
|
||||
|
||||
// flockF is a file, which makes sure that the Storage is opened by a single process
|
||||
flockF *os.File
|
||||
|
||||
// partitions is a list of partitions for the Storage.
|
||||
//
|
||||
// It must be accessed under partitionsLock.
|
||||
partitions []*partitionWrapper
|
||||
|
||||
// ptwHot is the "hot" partition, were the last rows were ingested.
|
||||
//
|
||||
// It must be accessed under partitionsLock.
|
||||
ptwHot *partitionWrapper
|
||||
|
||||
// partitionsLock protects partitions and ptwHot.
|
||||
partitionsLock sync.Mutex
|
||||
|
||||
// stopCh is closed when the Storage must be stopped.
|
||||
stopCh chan struct{}
|
||||
|
||||
// wg is used for waiting for background workers at MustClose().
|
||||
wg sync.WaitGroup
|
||||
|
||||
// streamIDCache caches (partition, streamIDs) seen during data ingestion.
|
||||
//
|
||||
// It reduces the load on persistent storage during data ingestion by skipping
|
||||
// the check whether the given stream is already registered in the persistent storage.
|
||||
streamIDCache *workingsetcache.Cache
|
||||
|
||||
// streamTagsCache caches StreamTags entries keyed by streamID.
|
||||
//
|
||||
// There is no need to put partition into the key for StreamTags,
|
||||
// since StreamTags are uniquely identified by streamID.
|
||||
//
|
||||
// It reduces the load on persistent storage during querying
|
||||
// when StreamTags must be found for the particular streamID
|
||||
streamTagsCache *workingsetcache.Cache
|
||||
|
||||
// streamFilterCache caches streamIDs keyed by (partition, []TenanID, StreamFilter).
|
||||
//
|
||||
// It reduces the load on persistent storage during querying by _stream:{...} filter.
|
||||
streamFilterCache *workingsetcache.Cache
|
||||
}
|
||||
|
||||
type partitionWrapper struct {
|
||||
// refCount is the number of active references to p.
|
||||
// When it reaches zero, then the p is closed.
|
||||
refCount int32
|
||||
|
||||
// The flag, which is set when the partition must be deleted after refCount reaches zero.
|
||||
mustBeDeleted uint32
|
||||
|
||||
// day is the day for the partition in the unix timestamp divided by the number of seconds in the day.
|
||||
day int64
|
||||
|
||||
// pt is the wrapped partition.
|
||||
pt *partition
|
||||
}
|
||||
|
||||
func newPartitionWrapper(pt *partition, day int64) *partitionWrapper {
|
||||
pw := &partitionWrapper{
|
||||
day: day,
|
||||
pt: pt,
|
||||
}
|
||||
pw.incRef()
|
||||
return pw
|
||||
}
|
||||
|
||||
func (ptw *partitionWrapper) incRef() {
|
||||
atomic.AddInt32(&ptw.refCount, 1)
|
||||
}
|
||||
|
||||
func (ptw *partitionWrapper) decRef() {
|
||||
n := atomic.AddInt32(&ptw.refCount, -1)
|
||||
if n > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
deletePath := ""
|
||||
if atomic.LoadUint32(&ptw.mustBeDeleted) != 0 {
|
||||
deletePath = ptw.pt.path
|
||||
}
|
||||
|
||||
// Close pw.pt, since nobody refers to it.
|
||||
mustClosePartition(ptw.pt)
|
||||
ptw.pt = nil
|
||||
|
||||
// Delete partition if needed.
|
||||
if deletePath != "" {
|
||||
mustDeletePartition(deletePath)
|
||||
}
|
||||
}
|
||||
|
||||
func (ptw *partitionWrapper) canAddAllRows(lr *LogRows) bool {
|
||||
minTimestamp := ptw.day * nsecPerDay
|
||||
maxTimestamp := minTimestamp + nsecPerDay - 1
|
||||
for _, ts := range lr.timestamps {
|
||||
if ts < minTimestamp || ts > maxTimestamp {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// mustCreateStorage creates Storage at the given path.
|
||||
func mustCreateStorage(path string) {
|
||||
fs.MustMkdirFailIfExist(path)
|
||||
|
||||
partitionsPath := filepath.Join(path, partitionsDirname)
|
||||
fs.MustMkdirFailIfExist(partitionsPath)
|
||||
}
|
||||
|
||||
// MustOpenStorage opens Storage at the given path.
|
||||
//
|
||||
// MustClose must be called on the returned Storage when it is no longer needed.
|
||||
func MustOpenStorage(path string, cfg *StorageConfig) *Storage {
|
||||
flushInterval := cfg.FlushInterval
|
||||
if flushInterval < time.Second {
|
||||
flushInterval = time.Second
|
||||
}
|
||||
|
||||
retention := cfg.Retention
|
||||
if retention < 24*time.Hour {
|
||||
retention = 24 * time.Hour
|
||||
}
|
||||
|
||||
futureRetention := cfg.FutureRetention
|
||||
if futureRetention < 24*time.Hour {
|
||||
futureRetention = 24 * time.Hour
|
||||
}
|
||||
|
||||
if !fs.IsPathExist(path) {
|
||||
mustCreateStorage(path)
|
||||
}
|
||||
|
||||
flockF := fs.MustCreateFlockFile(path)
|
||||
|
||||
// Load caches
|
||||
mem := memory.Allowed()
|
||||
streamIDCachePath := filepath.Join(path, cacheDirname, streamIDCacheFilename)
|
||||
streamIDCache := workingsetcache.Load(streamIDCachePath, mem/16)
|
||||
|
||||
streamTagsCache := workingsetcache.New(mem / 10)
|
||||
|
||||
streamFilterCache := workingsetcache.New(mem / 10)
|
||||
|
||||
s := &Storage{
|
||||
path: path,
|
||||
retention: retention,
|
||||
flushInterval: flushInterval,
|
||||
futureRetention: futureRetention,
|
||||
logNewStreams: cfg.LogNewStreams,
|
||||
logIngestedRows: cfg.LogIngestedRows,
|
||||
flockF: flockF,
|
||||
stopCh: make(chan struct{}),
|
||||
|
||||
streamIDCache: streamIDCache,
|
||||
streamTagsCache: streamTagsCache,
|
||||
streamFilterCache: streamFilterCache,
|
||||
}
|
||||
|
||||
partitionsPath := filepath.Join(path, partitionsDirname)
|
||||
fs.MustMkdirIfNotExist(partitionsPath)
|
||||
des := fs.MustReadDir(partitionsPath)
|
||||
ptws := make([]*partitionWrapper, len(des))
|
||||
for i, de := range des {
|
||||
fname := de.Name()
|
||||
|
||||
// Parse the day for the partition
|
||||
t, err := time.Parse(partitionNameFormat, fname)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot parse partition filename %q at %q; it must be in the form YYYYMMDD: %s", fname, partitionsPath, err)
|
||||
}
|
||||
day := t.UTC().UnixNano() / nsecPerDay
|
||||
|
||||
partitionPath := filepath.Join(partitionsPath, fname)
|
||||
pt := mustOpenPartition(s, partitionPath)
|
||||
ptws[i] = newPartitionWrapper(pt, day)
|
||||
}
|
||||
sort.Slice(ptws, func(i, j int) bool {
|
||||
return ptws[i].day < ptws[j].day
|
||||
})
|
||||
|
||||
// Delete partitions from the future if needed
|
||||
maxAllowedDay := s.getMaxAllowedDay()
|
||||
j := len(ptws) - 1
|
||||
for j >= 0 {
|
||||
ptw := ptws[j]
|
||||
if ptw.day <= maxAllowedDay {
|
||||
break
|
||||
}
|
||||
logger.Infof("the partition %s is scheduled to be deleted because it is outside the -futureRetention=%dd", ptw.pt.path, durationToDays(s.futureRetention))
|
||||
atomic.StoreUint32(&ptw.mustBeDeleted, 1)
|
||||
ptw.decRef()
|
||||
j--
|
||||
}
|
||||
j++
|
||||
for i := j; i < len(ptws); i++ {
|
||||
ptws[i] = nil
|
||||
}
|
||||
ptws = ptws[:j]
|
||||
|
||||
s.partitions = ptws
|
||||
s.runRetentionWatcher()
|
||||
return s
|
||||
}
|
||||
|
||||
const partitionNameFormat = "20060102"
|
||||
|
||||
func (s *Storage) runRetentionWatcher() {
|
||||
s.wg.Add(1)
|
||||
go func() {
|
||||
s.watchRetention()
|
||||
s.wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Storage) watchRetention() {
|
||||
ticker := time.NewTicker(time.Hour)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
var ptwsToDelete []*partitionWrapper
|
||||
minAllowedDay := s.getMinAllowedDay()
|
||||
|
||||
s.partitionsLock.Lock()
|
||||
|
||||
// Delete outdated partitions.
|
||||
// s.partitions are sorted by day, so the partitions, which can become outdated, are located at the beginning of the list
|
||||
for _, ptw := range s.partitions {
|
||||
if ptw.day >= minAllowedDay {
|
||||
break
|
||||
}
|
||||
ptwsToDelete = append(ptwsToDelete, ptw)
|
||||
}
|
||||
for i := range ptwsToDelete {
|
||||
s.partitions[i] = nil
|
||||
}
|
||||
s.partitions = s.partitions[len(ptwsToDelete):]
|
||||
|
||||
s.partitionsLock.Unlock()
|
||||
|
||||
for _, ptw := range ptwsToDelete {
|
||||
logger.Infof("the partition %s is scheduled to be deleted because it is outside the -retentionPeriod=%dd", ptw.pt.path, durationToDays(s.retention))
|
||||
atomic.StoreUint32(&ptw.mustBeDeleted, 1)
|
||||
ptw.decRef()
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.stopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) getMinAllowedDay() int64 {
|
||||
return time.Now().UTC().Add(-s.retention).UnixNano() / nsecPerDay
|
||||
}
|
||||
|
||||
func (s *Storage) getMaxAllowedDay() int64 {
|
||||
return time.Now().UTC().Add(s.futureRetention).UnixNano() / nsecPerDay
|
||||
}
|
||||
|
||||
// MustClose closes s.
|
||||
//
|
||||
// It is expected that nobody uses the storage at the close time.
|
||||
func (s *Storage) MustClose() {
|
||||
// Stop background workers
|
||||
close(s.stopCh)
|
||||
s.wg.Wait()
|
||||
|
||||
// Close partitions
|
||||
for _, pw := range s.partitions {
|
||||
pw.decRef()
|
||||
if pw.refCount != 0 {
|
||||
logger.Panicf("BUG: there are %d users of partition", pw.refCount)
|
||||
}
|
||||
}
|
||||
s.partitions = nil
|
||||
|
||||
// Save caches
|
||||
streamIDCachePath := filepath.Join(s.path, cacheDirname, streamIDCacheFilename)
|
||||
if err := s.streamIDCache.Save(streamIDCachePath); err != nil {
|
||||
logger.Panicf("FATAL: cannot save streamID cache to %q: %s", streamIDCachePath, err)
|
||||
}
|
||||
s.streamIDCache.Stop()
|
||||
s.streamIDCache = nil
|
||||
|
||||
s.streamTagsCache.Stop()
|
||||
s.streamTagsCache = nil
|
||||
|
||||
s.streamFilterCache.Stop()
|
||||
s.streamFilterCache = nil
|
||||
|
||||
// release lock file
|
||||
fs.MustClose(s.flockF)
|
||||
s.flockF = nil
|
||||
|
||||
s.path = ""
|
||||
}
|
||||
|
||||
// MustAddRows adds lr to s.
|
||||
func (s *Storage) MustAddRows(lr *LogRows) {
|
||||
// Fast path - try adding all the rows to the hot partition
|
||||
s.partitionsLock.Lock()
|
||||
ptwHot := s.ptwHot
|
||||
if ptwHot != nil {
|
||||
ptwHot.incRef()
|
||||
}
|
||||
s.partitionsLock.Unlock()
|
||||
|
||||
if ptwHot != nil {
|
||||
if ptwHot.canAddAllRows(lr) {
|
||||
ptwHot.pt.mustAddRows(lr)
|
||||
ptwHot.decRef()
|
||||
return
|
||||
}
|
||||
ptwHot.decRef()
|
||||
}
|
||||
|
||||
// Slow path - rows cannot be added to the hot partition, so split rows among available partitions
|
||||
minAllowedDay := s.getMinAllowedDay()
|
||||
maxAllowedDay := s.getMaxAllowedDay()
|
||||
m := make(map[int64]*LogRows)
|
||||
for i, ts := range lr.timestamps {
|
||||
day := ts / nsecPerDay
|
||||
if day < minAllowedDay {
|
||||
rf := RowFormatter(lr.rows[i])
|
||||
tsf := TimeFormatter(ts)
|
||||
minAllowedTsf := TimeFormatter(minAllowedDay * nsecPerDay)
|
||||
tooSmallTimestampLogger.Warnf("skipping log entry with too small timestamp=%s; it must be bigger than %s according "+
|
||||
"to the configured -retentionPeriod. See https://docs.victoriametrics.com/VictoriaLogs/#retention ; "+
|
||||
"log entry: %s", &tsf, &minAllowedTsf, &rf)
|
||||
atomic.AddUint64(&s.rowsDroppedTooSmallTimestamp, 1)
|
||||
continue
|
||||
}
|
||||
if day > maxAllowedDay {
|
||||
rf := RowFormatter(lr.rows[i])
|
||||
tsf := TimeFormatter(ts)
|
||||
maxAllowedTsf := TimeFormatter(maxAllowedDay * nsecPerDay)
|
||||
tooBigTimestampLogger.Warnf("skipping log entry with too big timestamp=%s; it must be smaller than %s according "+
|
||||
"to the configured -futureRetention; see https://docs.victoriametrics.com/VictoriaLogs/#retention ; "+
|
||||
"log entry: %s", &tsf, &maxAllowedTsf, &rf)
|
||||
atomic.AddUint64(&s.rowsDroppedTooBigTimestamp, 1)
|
||||
continue
|
||||
}
|
||||
lrPart := m[day]
|
||||
if lrPart == nil {
|
||||
lrPart = GetLogRows(nil, nil)
|
||||
m[day] = lrPart
|
||||
}
|
||||
lrPart.mustAddInternal(lr.streamIDs[i], ts, lr.rows[i], lr.streamTagsCanonicals[i])
|
||||
}
|
||||
for day, lrPart := range m {
|
||||
ptw := s.getPartitionForDay(day)
|
||||
ptw.pt.mustAddRows(lrPart)
|
||||
ptw.decRef()
|
||||
PutLogRows(lrPart)
|
||||
}
|
||||
}
|
||||
|
||||
var tooSmallTimestampLogger = logger.WithThrottler("too_small_timestamp", 5*time.Second)
|
||||
var tooBigTimestampLogger = logger.WithThrottler("too_big_timestamp", 5*time.Second)
|
||||
|
||||
const nsecPerDay = 24 * 3600 * 1e9
|
||||
|
||||
// TimeFormatter implements fmt.Stringer for timestamp in nanoseconds
|
||||
type TimeFormatter int64
|
||||
|
||||
// String returns human-readable representation for tf.
|
||||
func (tf *TimeFormatter) String() string {
|
||||
ts := int64(*tf)
|
||||
t := time.Unix(0, ts).UTC()
|
||||
return t.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
func (s *Storage) getPartitionForDay(day int64) *partitionWrapper {
|
||||
s.partitionsLock.Lock()
|
||||
|
||||
// Search for the partition using binary search
|
||||
ptws := s.partitions
|
||||
n := sort.Search(len(ptws), func(i int) bool {
|
||||
return ptws[i].day >= day
|
||||
})
|
||||
var ptw *partitionWrapper
|
||||
if n < len(ptws) {
|
||||
ptw = ptws[n]
|
||||
if ptw.day != day {
|
||||
ptw = nil
|
||||
}
|
||||
}
|
||||
if ptw == nil {
|
||||
// Missing partition for the given day. Create it.
|
||||
fname := time.Unix(0, day*nsecPerDay).UTC().Format(partitionNameFormat)
|
||||
partitionPath := filepath.Join(s.path, partitionsDirname, fname)
|
||||
mustCreatePartition(partitionPath)
|
||||
|
||||
pt := mustOpenPartition(s, partitionPath)
|
||||
ptw = newPartitionWrapper(pt, day)
|
||||
if n == len(ptws) {
|
||||
ptws = append(ptws, ptw)
|
||||
} else {
|
||||
ptws = append(ptws[:n+1], ptws[n:]...)
|
||||
ptws[n] = ptw
|
||||
}
|
||||
s.partitions = ptws
|
||||
}
|
||||
|
||||
s.ptwHot = ptw
|
||||
ptw.incRef()
|
||||
|
||||
s.partitionsLock.Unlock()
|
||||
|
||||
return ptw
|
||||
}
|
||||
|
||||
// UpdateStats updates ss for the given s.
|
||||
func (s *Storage) UpdateStats(ss *StorageStats) {
|
||||
ss.RowsDroppedTooBigTimestamp += atomic.LoadUint64(&s.rowsDroppedTooBigTimestamp)
|
||||
ss.RowsDroppedTooSmallTimestamp += atomic.LoadUint64(&s.rowsDroppedTooSmallTimestamp)
|
||||
|
||||
s.partitionsLock.Lock()
|
||||
ss.PartitionsCount += uint64(len(s.partitions))
|
||||
for _, ptw := range s.partitions {
|
||||
ptw.pt.updateStats(&ss.PartitionStats)
|
||||
}
|
||||
s.partitionsLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Storage) debugFlush() {
|
||||
s.partitionsLock.Lock()
|
||||
ptws := append([]*partitionWrapper{}, s.partitions...)
|
||||
for _, ptw := range ptws {
|
||||
ptw.incRef()
|
||||
}
|
||||
s.partitionsLock.Unlock()
|
||||
|
||||
for _, ptw := range ptws {
|
||||
ptw.pt.debugFlush()
|
||||
ptw.decRef()
|
||||
}
|
||||
}
|
||||
|
||||
func durationToDays(d time.Duration) int64 {
|
||||
return int64(d / (time.Hour * 24))
|
||||
}
|
602
lib/logstorage/storage_search.go
Normal file
602
lib/logstorage/storage_search.go
Normal file
|
@ -0,0 +1,602 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
)
|
||||
|
||||
// genericSearchOptions contain options used for search.
|
||||
type genericSearchOptions struct {
|
||||
// tenantIDs must contain the list of tenantIDs for the search.
|
||||
tenantIDs []TenantID
|
||||
|
||||
// filter is the filter to use for the search
|
||||
filter filter
|
||||
|
||||
// resultColumnNames is names of columns to return in the result.
|
||||
resultColumnNames []string
|
||||
}
|
||||
|
||||
type searchOptions struct {
|
||||
// Optional sorted list of tenantIDs for the search.
|
||||
// If it is empty, then the search is performed by streamIDs
|
||||
tenantIDs []TenantID
|
||||
|
||||
// Optional sorted list of streamIDs for the search.
|
||||
// If it is empty, then the search is performed by tenantIDs
|
||||
streamIDs []streamID
|
||||
|
||||
// minTimestamp is the minimum timestamp for the search
|
||||
minTimestamp int64
|
||||
|
||||
// maxTimestamp is the maximum timestamp for the search
|
||||
maxTimestamp int64
|
||||
|
||||
// filter is the filter to use for the search
|
||||
filter filter
|
||||
|
||||
// resultColumnNames is names of columns to return in the result
|
||||
resultColumnNames []string
|
||||
}
|
||||
|
||||
// RunQuery runs the given q and calls processBlock for results
|
||||
func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{}, processBlock func(columns []BlockColumn)) {
|
||||
resultColumnNames := q.getResultColumnNames()
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: tenantIDs,
|
||||
filter: q.f,
|
||||
resultColumnNames: resultColumnNames,
|
||||
}
|
||||
workersCount := cgroup.AvailableCPUs()
|
||||
s.search(workersCount, so, stopCh, func(workerID uint, br *blockResult) {
|
||||
brs := getBlockRows()
|
||||
cs := brs.cs
|
||||
|
||||
for i, columnName := range resultColumnNames {
|
||||
cs = append(cs, BlockColumn{
|
||||
Name: columnName,
|
||||
Values: br.getColumnValues(i),
|
||||
})
|
||||
}
|
||||
processBlock(cs)
|
||||
|
||||
brs.cs = cs
|
||||
putBlockRows(brs)
|
||||
})
|
||||
}
|
||||
|
||||
type blockRows struct {
|
||||
cs []BlockColumn
|
||||
}
|
||||
|
||||
func (brs *blockRows) reset() {
|
||||
cs := brs.cs
|
||||
for i := range cs {
|
||||
cs[i].reset()
|
||||
}
|
||||
brs.cs = cs[:0]
|
||||
}
|
||||
|
||||
func getBlockRows() *blockRows {
|
||||
v := blockRowsPool.Get()
|
||||
if v == nil {
|
||||
return &blockRows{}
|
||||
}
|
||||
return v.(*blockRows)
|
||||
}
|
||||
|
||||
func putBlockRows(brs *blockRows) {
|
||||
brs.reset()
|
||||
blockRowsPool.Put(brs)
|
||||
}
|
||||
|
||||
var blockRowsPool sync.Pool
|
||||
|
||||
// BlockColumn is a single column of a block of data
|
||||
type BlockColumn struct {
|
||||
// Name is the column name
|
||||
Name string
|
||||
|
||||
// Values is column values
|
||||
Values []string
|
||||
}
|
||||
|
||||
func (c *BlockColumn) reset() {
|
||||
c.Name = ""
|
||||
c.Values = nil
|
||||
}
|
||||
|
||||
// The number of blocks to search at once by a single worker
|
||||
//
|
||||
// This number must be increased on systems with many CPU cores in order to amortize
|
||||
// the overhead for passing the blockSearchWork to worker goroutines.
|
||||
const blockSearchWorksPerBatch = 64
|
||||
|
||||
// searchResultFunc must process sr.
|
||||
//
|
||||
// The callback is called at the worker with the given workerID.
|
||||
type searchResultFunc func(workerID uint, br *blockResult)
|
||||
|
||||
// search searches for the matching rows according to so.
|
||||
//
|
||||
// It calls f for each found matching block.
|
||||
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
|
||||
// Spin up workers
|
||||
var wg sync.WaitGroup
|
||||
workCh := make(chan []*blockSearchWork, workersCount)
|
||||
wg.Add(workersCount)
|
||||
for i := 0; i < workersCount; i++ {
|
||||
go func(workerID uint) {
|
||||
bs := getBlockSearch()
|
||||
for bsws := range workCh {
|
||||
for _, bsw := range bsws {
|
||||
bs.search(bsw)
|
||||
if bs.br.RowsCount() > 0 {
|
||||
processBlockResult(workerID, &bs.br)
|
||||
}
|
||||
}
|
||||
}
|
||||
putBlockSearch(bs)
|
||||
wg.Done()
|
||||
}(uint(i))
|
||||
}
|
||||
|
||||
// Obtain common time filter from so.filter
|
||||
tf, f := getCommonTimeFilter(so.filter)
|
||||
|
||||
// Select partitions according to the selected time range
|
||||
s.partitionsLock.Lock()
|
||||
ptws := s.partitions
|
||||
minDay := tf.minTimestamp / nsecPerDay
|
||||
n := sort.Search(len(ptws), func(i int) bool {
|
||||
return ptws[i].day >= minDay
|
||||
})
|
||||
ptws = ptws[n:]
|
||||
maxDay := tf.maxTimestamp / nsecPerDay
|
||||
n = sort.Search(len(ptws), func(i int) bool {
|
||||
return ptws[i].day > maxDay
|
||||
})
|
||||
ptws = ptws[:n]
|
||||
for _, ptw := range ptws {
|
||||
ptw.incRef()
|
||||
}
|
||||
s.partitionsLock.Unlock()
|
||||
|
||||
// Obtain common streamFilter from f
|
||||
var sf *StreamFilter
|
||||
sf, f = getCommonStreamFilter(f)
|
||||
|
||||
// Apply search to matching partitions
|
||||
var pws []*partWrapper
|
||||
for _, ptw := range ptws {
|
||||
pws = ptw.pt.search(pws, tf, sf, f, so, workCh, stopCh)
|
||||
}
|
||||
|
||||
// Wait until workers finish their work
|
||||
close(workCh)
|
||||
wg.Wait()
|
||||
|
||||
// Decrement references to parts
|
||||
for _, pw := range pws {
|
||||
pw.decRef()
|
||||
}
|
||||
|
||||
// Decrement references to partitions
|
||||
for _, ptw := range ptws {
|
||||
ptw.decRef()
|
||||
}
|
||||
}
|
||||
|
||||
func (pt *partition) search(pwsDst []*partWrapper, tf *timeFilter, sf *StreamFilter, f filter, so *genericSearchOptions,
|
||||
workCh chan<- []*blockSearchWork, stopCh <-chan struct{},
|
||||
) []*partWrapper {
|
||||
tenantIDs := so.tenantIDs
|
||||
var streamIDs []streamID
|
||||
if sf != nil {
|
||||
streamIDs = pt.idb.searchStreamIDs(tenantIDs, sf)
|
||||
tenantIDs = nil
|
||||
}
|
||||
if hasStreamFilters(f) {
|
||||
f = initStreamFilters(tenantIDs, pt.idb, f)
|
||||
}
|
||||
soInternal := &searchOptions{
|
||||
tenantIDs: tenantIDs,
|
||||
streamIDs: streamIDs,
|
||||
minTimestamp: tf.minTimestamp,
|
||||
maxTimestamp: tf.maxTimestamp,
|
||||
filter: f,
|
||||
resultColumnNames: so.resultColumnNames,
|
||||
}
|
||||
return pt.ddb.search(pwsDst, soInternal, workCh, stopCh)
|
||||
}
|
||||
|
||||
func hasStreamFilters(f filter) bool {
|
||||
switch t := f.(type) {
|
||||
case *andFilter:
|
||||
return hasStreamFiltersInList(t.filters)
|
||||
case *orFilter:
|
||||
return hasStreamFiltersInList(t.filters)
|
||||
case *notFilter:
|
||||
return hasStreamFilters(t.f)
|
||||
case *streamFilter:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func hasStreamFiltersInList(filters []filter) bool {
|
||||
for _, f := range filters {
|
||||
if hasStreamFilters(f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
|
||||
switch t := f.(type) {
|
||||
case *andFilter:
|
||||
return &andFilter{
|
||||
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
|
||||
}
|
||||
case *orFilter:
|
||||
return &orFilter{
|
||||
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
|
||||
}
|
||||
case *notFilter:
|
||||
return ¬Filter{
|
||||
f: initStreamFilters(tenantIDs, idb, t.f),
|
||||
}
|
||||
case *streamFilter:
|
||||
return &streamFilter{
|
||||
f: t.f,
|
||||
tenantIDs: tenantIDs,
|
||||
idb: idb,
|
||||
}
|
||||
default:
|
||||
return t
|
||||
}
|
||||
}
|
||||
|
||||
func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter {
|
||||
result := make([]filter, len(filters))
|
||||
for i, f := range filters {
|
||||
result[i] = initStreamFilters(tenantIDs, idb, f)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (ddb *datadb) search(pwsDst []*partWrapper, so *searchOptions, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) []*partWrapper {
|
||||
// Select parts with data for the given time range
|
||||
ddb.partsLock.Lock()
|
||||
pwsDstLen := len(pwsDst)
|
||||
pwsDst = appendPartsInTimeRange(pwsDst, ddb.inmemoryParts, so.minTimestamp, so.maxTimestamp)
|
||||
pwsDst = appendPartsInTimeRange(pwsDst, ddb.fileParts, so.minTimestamp, so.maxTimestamp)
|
||||
pws := pwsDst[pwsDstLen:]
|
||||
for _, pw := range pws {
|
||||
pw.incRef()
|
||||
}
|
||||
ddb.partsLock.Unlock()
|
||||
|
||||
// Apply search to matching parts
|
||||
for _, pw := range pws {
|
||||
pw.p.search(so, workCh, stopCh)
|
||||
}
|
||||
|
||||
return pwsDst
|
||||
}
|
||||
|
||||
func (p *part) search(so *searchOptions, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
|
||||
bhss := getBlockHeaders()
|
||||
if len(so.tenantIDs) > 0 {
|
||||
p.searchByTenantIDs(so, bhss, workCh, stopCh)
|
||||
} else {
|
||||
p.searchByStreamIDs(so, bhss, workCh, stopCh)
|
||||
}
|
||||
putBlockHeaders(bhss)
|
||||
}
|
||||
|
||||
func getBlockHeaders() *blockHeaders {
|
||||
v := blockHeadersPool.Get()
|
||||
if v == nil {
|
||||
return &blockHeaders{}
|
||||
}
|
||||
return v.(*blockHeaders)
|
||||
}
|
||||
|
||||
func putBlockHeaders(bhss *blockHeaders) {
|
||||
bhss.reset()
|
||||
blockHeadersPool.Put(bhss)
|
||||
}
|
||||
|
||||
var blockHeadersPool sync.Pool
|
||||
|
||||
type blockHeaders struct {
|
||||
bhs []blockHeader
|
||||
}
|
||||
|
||||
func (bhss *blockHeaders) reset() {
|
||||
bhs := bhss.bhs
|
||||
for i := range bhs {
|
||||
bhs[i].reset()
|
||||
}
|
||||
bhss.bhs = bhs[:0]
|
||||
}
|
||||
|
||||
func (p *part) searchByTenantIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
|
||||
// it is assumed that tenantIDs are sorted
|
||||
tenantIDs := so.tenantIDs
|
||||
|
||||
bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
|
||||
scheduleBlockSearch := func(bh *blockHeader) bool {
|
||||
// Do not use pool for blockSearchWork, since it is returned back to the pool
|
||||
// at another goroutine, which may run on another CPU core.
|
||||
// This means that it will be put into another per-CPU pool, which may result
|
||||
// in slowdown related to memory synchronization between CPU cores.
|
||||
// This slowdown is increased on systems with bigger number of CPU cores.
|
||||
bsw := newBlockSearchWork(p, so, bh)
|
||||
bsws = append(bsws, bsw)
|
||||
if len(bsws) < cap(bsws) {
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-stopCh:
|
||||
return false
|
||||
case workCh <- bsws:
|
||||
bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// it is assumed that ibhs are sorted
|
||||
ibhs := p.indexBlockHeaders
|
||||
for len(ibhs) > 0 && len(tenantIDs) > 0 {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
// locate tenantID equal or bigger than the tenantID in ibhs[0]
|
||||
tenantID := &tenantIDs[0]
|
||||
if tenantID.less(&ibhs[0].streamID.tenantID) {
|
||||
tenantID = &ibhs[0].streamID.tenantID
|
||||
n := sort.Search(len(tenantIDs), func(i int) bool {
|
||||
return !tenantIDs[i].less(tenantID)
|
||||
})
|
||||
if n == len(tenantIDs) {
|
||||
tenantIDs = nil
|
||||
break
|
||||
}
|
||||
tenantID = &tenantIDs[n]
|
||||
tenantIDs = tenantIDs[n:]
|
||||
}
|
||||
|
||||
// locate indexBlockHeader with equal or bigger tenantID than the given tenantID
|
||||
n := 0
|
||||
if ibhs[0].streamID.tenantID.less(tenantID) {
|
||||
n = sort.Search(len(ibhs), func(i int) bool {
|
||||
return !ibhs[i].streamID.tenantID.less(tenantID)
|
||||
})
|
||||
if n == len(ibhs) || n > 0 && ibhs[n].streamID.tenantID.equal(tenantID) {
|
||||
// The end of ibhs[n-1] may contain blocks for the given tenantID, so move it backwards
|
||||
n--
|
||||
}
|
||||
}
|
||||
ibh := &ibhs[n]
|
||||
ibhs = ibhs[n+1:]
|
||||
|
||||
if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
|
||||
// Skip the ibh, since it doesn't contain entries on the requested time range
|
||||
continue
|
||||
}
|
||||
|
||||
bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)
|
||||
|
||||
bhs := bhss.bhs
|
||||
for len(bhs) > 0 {
|
||||
// search for blocks with the given tenantID
|
||||
n = sort.Search(len(bhs), func(i int) bool {
|
||||
return !bhs[i].streamID.tenantID.less(tenantID)
|
||||
})
|
||||
bhs = bhs[n:]
|
||||
for len(bhs) > 0 && bhs[0].streamID.tenantID.equal(tenantID) {
|
||||
bh := &bhs[0]
|
||||
bhs = bhs[1:]
|
||||
th := &bh.timestampsHeader
|
||||
if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
|
||||
continue
|
||||
}
|
||||
scheduleBlockSearch(bh)
|
||||
}
|
||||
if len(bhs) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// search for the next tenantID, which can potentially match tenantID from bhs[0]
|
||||
tenantID = &bhs[0].streamID.tenantID
|
||||
n = sort.Search(len(tenantIDs), func(i int) bool {
|
||||
return !tenantIDs[i].less(tenantID)
|
||||
})
|
||||
if n == len(tenantIDs) {
|
||||
tenantIDs = nil
|
||||
break
|
||||
}
|
||||
tenantID = &tenantIDs[n]
|
||||
tenantIDs = tenantIDs[n:]
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the remaining work
|
||||
if len(bsws) > 0 {
|
||||
workCh <- bsws
|
||||
}
|
||||
}
|
||||
|
||||
func (p *part) searchByStreamIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
|
||||
// it is assumed that streamIDs are sorted
|
||||
streamIDs := so.streamIDs
|
||||
|
||||
bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
|
||||
scheduleBlockSearch := func(bh *blockHeader) bool {
|
||||
// Do not use pool for blockSearchWork, since it is returned back to the pool
|
||||
// at another goroutine, which may run on another CPU core.
|
||||
// This means that it will be put into another per-CPU pool, which may result
|
||||
// in slowdown related to memory synchronization between CPU cores.
|
||||
// This slowdown is increased on systems with bigger number of CPU cores.
|
||||
bsw := newBlockSearchWork(p, so, bh)
|
||||
bsws = append(bsws, bsw)
|
||||
if len(bsws) < cap(bsws) {
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-stopCh:
|
||||
return false
|
||||
case workCh <- bsws:
|
||||
bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// it is assumed that ibhs are sorted
|
||||
ibhs := p.indexBlockHeaders
|
||||
|
||||
for len(ibhs) > 0 && len(streamIDs) > 0 {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
// locate streamID equal or bigger than the streamID in ibhs[0]
|
||||
streamID := &streamIDs[0]
|
||||
if streamID.less(&ibhs[0].streamID) {
|
||||
streamID = &ibhs[0].streamID
|
||||
n := sort.Search(len(streamIDs), func(i int) bool {
|
||||
return !streamIDs[i].less(streamID)
|
||||
})
|
||||
if n == len(streamIDs) {
|
||||
streamIDs = nil
|
||||
break
|
||||
}
|
||||
streamID = &streamIDs[n]
|
||||
streamIDs = streamIDs[n:]
|
||||
}
|
||||
|
||||
// locate indexBlockHeader with equal or bigger streamID than the given streamID
|
||||
n := 0
|
||||
if ibhs[0].streamID.less(streamID) {
|
||||
n = sort.Search(len(ibhs), func(i int) bool {
|
||||
return !ibhs[i].streamID.less(streamID)
|
||||
})
|
||||
if n == len(ibhs) || n > 0 && ibhs[n].streamID.equal(streamID) {
|
||||
// The end of ibhs[n-1] may contain blocks for the given streamID, so move it backwards
|
||||
n--
|
||||
}
|
||||
}
|
||||
ibh := &ibhs[n]
|
||||
ibhs = ibhs[n+1:]
|
||||
|
||||
if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
|
||||
// Skip the ibh, since it doesn't contain entries on the requested time range
|
||||
continue
|
||||
}
|
||||
|
||||
bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)
|
||||
|
||||
bhs := bhss.bhs
|
||||
for len(bhs) > 0 {
|
||||
// search for blocks with the given streamID
|
||||
n = sort.Search(len(bhs), func(i int) bool {
|
||||
return !bhs[i].streamID.less(streamID)
|
||||
})
|
||||
bhs = bhs[n:]
|
||||
for len(bhs) > 0 && bhs[0].streamID.equal(streamID) {
|
||||
bh := &bhs[0]
|
||||
bhs = bhs[1:]
|
||||
th := &bh.timestampsHeader
|
||||
if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
|
||||
continue
|
||||
}
|
||||
if !scheduleBlockSearch(bh) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(bhs) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// search for the next streamID, which can potentially match streamID from bhs[0]
|
||||
streamID = &bhs[0].streamID
|
||||
n = sort.Search(len(streamIDs), func(i int) bool {
|
||||
return !streamIDs[i].less(streamID)
|
||||
})
|
||||
if n == len(streamIDs) {
|
||||
streamIDs = nil
|
||||
break
|
||||
}
|
||||
streamID = &streamIDs[n]
|
||||
streamIDs = streamIDs[n:]
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the remaining work
|
||||
if len(bsws) > 0 {
|
||||
workCh <- bsws
|
||||
}
|
||||
}
|
||||
|
||||
func appendPartsInTimeRange(dst, src []*partWrapper, minTimestamp, maxTimestamp int64) []*partWrapper {
|
||||
for _, pw := range src {
|
||||
if maxTimestamp < pw.p.ph.MinTimestamp || minTimestamp > pw.p.ph.MaxTimestamp {
|
||||
continue
|
||||
}
|
||||
dst = append(dst, pw)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
|
||||
switch t := f.(type) {
|
||||
case *andFilter:
|
||||
filters := t.filters
|
||||
for i, filter := range filters {
|
||||
sf, ok := filter.(*streamFilter)
|
||||
if ok && !sf.f.isEmpty() {
|
||||
// Remove sf from filters, since it doesn't filter out anything then.
|
||||
af := &andFilter{
|
||||
filters: append(filters[:i:i], filters[i+1:]...),
|
||||
}
|
||||
return sf.f, af
|
||||
}
|
||||
}
|
||||
case *streamFilter:
|
||||
return t.f, &noopFilter{}
|
||||
}
|
||||
return nil, f
|
||||
}
|
||||
|
||||
func getCommonTimeFilter(f filter) (*timeFilter, filter) {
|
||||
switch t := f.(type) {
|
||||
case *andFilter:
|
||||
for _, filter := range t.filters {
|
||||
tf, ok := filter.(*timeFilter)
|
||||
if ok {
|
||||
// The tf must remain in af in order to properly filter out rows outside the selected time range
|
||||
return tf, f
|
||||
}
|
||||
}
|
||||
case *timeFilter:
|
||||
return t, f
|
||||
}
|
||||
return allTimeFilter, f
|
||||
}
|
||||
|
||||
var allTimeFilter = &timeFilter{
|
||||
minTimestamp: math.MinInt64,
|
||||
maxTimestamp: math.MaxInt64,
|
||||
}
|
663
lib/logstorage/storage_search_test.go
Normal file
663
lib/logstorage/storage_search_test.go
Normal file
|
@ -0,0 +1,663 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestStorageRunQuery(t *testing.T) {
|
||||
const path = "TestStorageRunQuery"
|
||||
|
||||
const tenantsCount = 11
|
||||
const streamsPerTenant = 3
|
||||
const blocksPerStream = 5
|
||||
const rowsPerBlock = 7
|
||||
|
||||
sc := &StorageConfig{
|
||||
Retention: 24 * time.Hour,
|
||||
}
|
||||
s := MustOpenStorage(path, sc)
|
||||
|
||||
// fill the storage with data
|
||||
var allTenantIDs []TenantID
|
||||
baseTimestamp := time.Now().UnixNano() - 3600*1e9
|
||||
var fields []Field
|
||||
streamTags := []string{
|
||||
"job",
|
||||
"instance",
|
||||
}
|
||||
for i := 0; i < tenantsCount; i++ {
|
||||
tenantID := TenantID{
|
||||
AccountID: uint32(i),
|
||||
ProjectID: uint32(10*i + 1),
|
||||
}
|
||||
allTenantIDs = append(allTenantIDs, tenantID)
|
||||
for j := 0; j < streamsPerTenant; j++ {
|
||||
streamIDValue := fmt.Sprintf("stream_id=%d", j)
|
||||
for k := 0; k < blocksPerStream; k++ {
|
||||
lr := GetLogRows(streamTags, nil)
|
||||
for m := 0; m < rowsPerBlock; m++ {
|
||||
timestamp := baseTimestamp + int64(m)*1e9 + int64(k)
|
||||
// Append stream fields
|
||||
fields = append(fields[:0], Field{
|
||||
Name: "job",
|
||||
Value: "foobar",
|
||||
}, Field{
|
||||
Name: "instance",
|
||||
Value: fmt.Sprintf("host-%d:234", j),
|
||||
})
|
||||
// append the remaining fields
|
||||
fields = append(fields, Field{
|
||||
Name: "_msg",
|
||||
Value: fmt.Sprintf("log message %d at block %d", m, k),
|
||||
})
|
||||
fields = append(fields, Field{
|
||||
Name: "source-file",
|
||||
Value: "/foo/bar/baz",
|
||||
})
|
||||
fields = append(fields, Field{
|
||||
Name: "tenant.id",
|
||||
Value: tenantID.String(),
|
||||
})
|
||||
fields = append(fields, Field{
|
||||
Name: "stream-id",
|
||||
Value: streamIDValue,
|
||||
})
|
||||
lr.MustAdd(tenantID, timestamp, fields)
|
||||
}
|
||||
s.MustAddRows(lr)
|
||||
PutLogRows(lr)
|
||||
}
|
||||
}
|
||||
}
|
||||
s.debugFlush()
|
||||
|
||||
// run tests on the storage data
|
||||
t.Run("missing-tenant", func(t *testing.T) {
|
||||
q := mustParseQuery(`"log message"`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 0,
|
||||
ProjectID: 0,
|
||||
}
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
})
|
||||
t.Run("missing-message-text", func(t *testing.T) {
|
||||
q := mustParseQuery(`foobar`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
})
|
||||
t.Run("matching-tenant-id", func(t *testing.T) {
|
||||
q := mustParseQuery(`tenant.id:*`)
|
||||
for i := 0; i < tenantsCount; i++ {
|
||||
tenantID := TenantID{
|
||||
AccountID: uint32(i),
|
||||
ProjectID: uint32(10*i + 1),
|
||||
}
|
||||
expectedTenantID := tenantID.String()
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
hasTenantIDColumn := false
|
||||
var columnNames []string
|
||||
for _, c := range columns {
|
||||
if c.Name == "tenant.id" {
|
||||
hasTenantIDColumn = true
|
||||
if len(c.Values) == 0 {
|
||||
panic(fmt.Errorf("unexpected zero rows"))
|
||||
}
|
||||
for _, v := range c.Values {
|
||||
if v != expectedTenantID {
|
||||
panic(fmt.Errorf("unexpected tenant.id; got %s; want %s", v, expectedTenantID))
|
||||
}
|
||||
}
|
||||
}
|
||||
columnNames = append(columnNames, c.Name)
|
||||
}
|
||||
if !hasTenantIDColumn {
|
||||
panic(fmt.Errorf("missing tenant.id column among columns: %q", columnNames))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of matching rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-tenant-ids", func(t *testing.T) {
|
||||
q := mustParseQuery(`"log message"`)
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
s.RunQuery(allTenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of matching rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-in-filter", func(t *testing.T) {
|
||||
q := mustParseQuery(`source-file:in(foobar,/foo/bar/baz)`)
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
s.RunQuery(allTenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of matching rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("stream-filter-mismatch", func(t *testing.T) {
|
||||
q := mustParseQuery(`_stream:{job="foobar",instance=~"host-.+:2345"} log`)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.RunQuery(allTenantIDs, q, nil, processBlock)
|
||||
})
|
||||
t.Run("matching-stream-id", func(t *testing.T) {
|
||||
for i := 0; i < streamsPerTenant; i++ {
|
||||
q := mustParseQuery(fmt.Sprintf(`log _stream:{job="foobar",instance="host-%d:234"} AND stream-id:*`, i))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
expectedStreamID := fmt.Sprintf("stream_id=%d", i)
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
hasStreamIDColumn := false
|
||||
var columnNames []string
|
||||
for _, c := range columns {
|
||||
if c.Name == "stream-id" {
|
||||
hasStreamIDColumn = true
|
||||
if len(c.Values) == 0 {
|
||||
panic(fmt.Errorf("unexpected zero rows"))
|
||||
}
|
||||
for _, v := range c.Values {
|
||||
if v != expectedStreamID {
|
||||
panic(fmt.Errorf("unexpected stream-id; got %s; want %s", v, expectedStreamID))
|
||||
}
|
||||
}
|
||||
}
|
||||
columnNames = append(columnNames, c.Name)
|
||||
}
|
||||
if !hasStreamIDColumn {
|
||||
panic(fmt.Errorf("missing stream-id column among columns: %q", columnNames))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows for stream %d; got %d; want %d", i, rowsCount, expectedRowsCount)
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) {
|
||||
q := mustParseQuery(`_msg:log _stream:{job="foobar",instance=~"host-[^:]+:234"} and re("message [02] at")`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * 2
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-time-range", func(t *testing.T) {
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock-2)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock-1)*1e9 - 1
|
||||
q := mustParseQuery(fmt.Sprintf(`_time:[%f,%f]`, float64(minTimestamp)/1e9, float64(maxTimestamp)/1e9))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-with-time-range", func(t *testing.T) {
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock-2)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock-1)*1e9 - 1
|
||||
q := mustParseQuery(fmt.Sprintf(`_time:[%f,%f] _stream:{job="foobar",instance="host-1:234"}`, float64(minTimestamp)/1e9, float64(maxTimestamp)/1e9))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-missing-time-range", func(t *testing.T) {
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock+2)*1e9
|
||||
q := mustParseQuery(fmt.Sprintf(`_stream:{job="foobar",instance="host-1:234"} _time:[%d, %d)`, minTimestamp/1e9, maxTimestamp/1e9))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
})
|
||||
t.Run("missing-time-range", func(t *testing.T) {
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock+2)*1e9
|
||||
q := mustParseQuery(fmt.Sprintf(`_time:[%d, %d)`, minTimestamp/1e9, maxTimestamp/1e9))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
processBlock := func(columns []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
s.RunQuery(tenantIDs, q, nil, processBlock)
|
||||
})
|
||||
|
||||
// Close the storage and delete its data
|
||||
s.MustClose()
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
func mustParseQuery(query string) *Query {
|
||||
q, err := ParseQuery(query)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse %s: %s", query, err))
|
||||
}
|
||||
return q
|
||||
}
|
||||
|
||||
func TestStorageSearch(t *testing.T) {
|
||||
const path = "TestStorageSearch"
|
||||
|
||||
const tenantsCount = 11
|
||||
const streamsPerTenant = 3
|
||||
const blocksPerStream = 5
|
||||
const rowsPerBlock = 7
|
||||
|
||||
sc := &StorageConfig{
|
||||
Retention: 24 * time.Hour,
|
||||
}
|
||||
s := MustOpenStorage(path, sc)
|
||||
|
||||
// fill the storage with data.
|
||||
var allTenantIDs []TenantID
|
||||
baseTimestamp := time.Now().UnixNano() - 3600*1e9
|
||||
var fields []Field
|
||||
streamTags := []string{
|
||||
"job",
|
||||
"instance",
|
||||
}
|
||||
for i := 0; i < tenantsCount; i++ {
|
||||
tenantID := TenantID{
|
||||
AccountID: uint32(i),
|
||||
ProjectID: uint32(10*i + 1),
|
||||
}
|
||||
allTenantIDs = append(allTenantIDs, tenantID)
|
||||
for j := 0; j < streamsPerTenant; j++ {
|
||||
for k := 0; k < blocksPerStream; k++ {
|
||||
lr := GetLogRows(streamTags, nil)
|
||||
for m := 0; m < rowsPerBlock; m++ {
|
||||
timestamp := baseTimestamp + int64(m)*1e9 + int64(k)
|
||||
// Append stream fields
|
||||
fields = append(fields[:0], Field{
|
||||
Name: "job",
|
||||
Value: "foobar",
|
||||
}, Field{
|
||||
Name: "instance",
|
||||
Value: fmt.Sprintf("host-%d:234", j),
|
||||
})
|
||||
// append the remaining fields
|
||||
fields = append(fields, Field{
|
||||
Name: "_msg",
|
||||
Value: fmt.Sprintf("log message %d at block %d", m, k),
|
||||
})
|
||||
fields = append(fields, Field{
|
||||
Name: "source-file",
|
||||
Value: "/foo/bar/baz",
|
||||
})
|
||||
lr.MustAdd(tenantID, timestamp, fields)
|
||||
}
|
||||
s.MustAddRows(lr)
|
||||
PutLogRows(lr)
|
||||
}
|
||||
}
|
||||
}
|
||||
s.debugFlush()
|
||||
|
||||
// run tests on the filled storage
|
||||
const workersCount = 3
|
||||
|
||||
getBaseFilter := func(minTimestamp, maxTimestamp int64, sf *StreamFilter) filter {
|
||||
var filters []filter
|
||||
filters = append(filters, &timeFilter{
|
||||
minTimestamp: minTimestamp,
|
||||
maxTimestamp: maxTimestamp,
|
||||
})
|
||||
if sf != nil {
|
||||
filters = append(filters, &streamFilter{
|
||||
f: sf,
|
||||
})
|
||||
}
|
||||
return &andFilter{
|
||||
filters: filters,
|
||||
}
|
||||
}
|
||||
|
||||
t.Run("missing-tenant-smaller-than-existing", func(t *testing.T) {
|
||||
tenantID := TenantID{
|
||||
AccountID: 0,
|
||||
ProjectID: 0,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, nil)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
})
|
||||
t.Run("missing-tenant-bigger-than-existing", func(t *testing.T) {
|
||||
tenantID := TenantID{
|
||||
AccountID: tenantsCount + 1,
|
||||
ProjectID: 0,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, nil)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
})
|
||||
t.Run("missing-tenant-middle", func(t *testing.T) {
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 0,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, nil)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
})
|
||||
t.Run("matching-tenant-id", func(t *testing.T) {
|
||||
for i := 0; i < tenantsCount; i++ {
|
||||
tenantID := TenantID{
|
||||
AccountID: uint32(i),
|
||||
ProjectID: uint32(10*i + 1),
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, nil)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of matching rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-tenant-ids", func(t *testing.T) {
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, nil)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: allTenantIDs,
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of matching rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("stream-filter-mismatch", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: allTenantIDs,
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
})
|
||||
t.Run("matching-stream-id", func(t *testing.T) {
|
||||
for i := 0; i < streamsPerTenant; i++ {
|
||||
sf := mustNewStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-stream-ids", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
f = &andFilter{
|
||||
filters: []filter{
|
||||
f,
|
||||
®expFilter{
|
||||
fieldName: "_msg",
|
||||
re: regexp.MustCompile("message [02] at "),
|
||||
},
|
||||
},
|
||||
}
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * 2
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-smaller-time-range", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock-2)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock-1)*1e9 - 1
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
rowsCount := uint32(0)
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream
|
||||
if rowsCount != uint32(expectedRowsCount) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d", rowsCount, expectedRowsCount)
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-missing-time-range", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
}
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
|
||||
maxTimestamp := baseTimestamp + (rowsPerBlock+2)*1e9
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: []TenantID{tenantID},
|
||||
filter: f,
|
||||
resultColumnNames: []string{"_msg"},
|
||||
}
|
||||
processBlock := func(workerID uint, br *blockResult) {
|
||||
panic(fmt.Errorf("unexpected match"))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
})
|
||||
|
||||
s.MustClose()
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
func mustNewStreamFilter(s string) *StreamFilter {
|
||||
sf, err := newStreamFilter(s)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error in newStreamFilter(%q): %s", s, err))
|
||||
}
|
||||
return sf
|
||||
}
|
102
lib/logstorage/storage_test.go
Normal file
102
lib/logstorage/storage_test.go
Normal file
|
@ -0,0 +1,102 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestStorageLifecycle(t *testing.T) {
|
||||
const path = "TestStorageLifecycle"
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
cfg := &StorageConfig{}
|
||||
s := MustOpenStorage(path, cfg)
|
||||
s.MustClose()
|
||||
}
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
func TestStorageMustAddRows(t *testing.T) {
|
||||
const path = "TestStorageMustAddRows"
|
||||
|
||||
var sStats StorageStats
|
||||
|
||||
cfg := &StorageConfig{}
|
||||
s := MustOpenStorage(path, cfg)
|
||||
|
||||
// Try adding the same entry multiple times.
|
||||
totalRowsCount := uint64(0)
|
||||
for i := 0; i < 100; i++ {
|
||||
lr := newTestLogRows(1, 1, 0)
|
||||
lr.timestamps[0] = time.Now().UTC().UnixNano()
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
s.MustAddRows(lr)
|
||||
sStats.Reset()
|
||||
s.UpdateStats(&sStats)
|
||||
if n := sStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in storage; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
s.MustClose()
|
||||
|
||||
// Re-open the storage and try writing data to it
|
||||
s = MustOpenStorage(path, cfg)
|
||||
|
||||
sStats.Reset()
|
||||
s.UpdateStats(&sStats)
|
||||
if n := sStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in storage; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
|
||||
lr := newTestLogRows(3, 10, 0)
|
||||
for i := range lr.timestamps {
|
||||
lr.timestamps[i] = time.Now().UTC().UnixNano()
|
||||
}
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
s.MustAddRows(lr)
|
||||
sStats.Reset()
|
||||
s.UpdateStats(&sStats)
|
||||
if n := sStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in storage; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
|
||||
s.MustClose()
|
||||
|
||||
// Re-open the storage with big retention and try writing data
|
||||
// to different days in the past and in the future
|
||||
cfg = &StorageConfig{
|
||||
Retention: 365 * 24 * time.Hour,
|
||||
FutureRetention: 365 * 24 * time.Hour,
|
||||
}
|
||||
s = MustOpenStorage(path, cfg)
|
||||
|
||||
lr = newTestLogRows(3, 10, 0)
|
||||
now := time.Now().UTC().UnixNano() - int64(len(lr.timestamps)/2)*nsecPerDay
|
||||
for i := range lr.timestamps {
|
||||
lr.timestamps[i] = now
|
||||
now += nsecPerDay
|
||||
}
|
||||
totalRowsCount += uint64(len(lr.timestamps))
|
||||
s.MustAddRows(lr)
|
||||
sStats.Reset()
|
||||
s.UpdateStats(&sStats)
|
||||
if n := sStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in storage; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
|
||||
s.MustClose()
|
||||
|
||||
// Make sure the stats is valid after re-opening the storage
|
||||
s = MustOpenStorage(path, cfg)
|
||||
sStats.Reset()
|
||||
s.UpdateStats(&sStats)
|
||||
if n := sStats.RowsCount(); n != totalRowsCount {
|
||||
t.Fatalf("unexpected number of entries in storage; got %d; want %d", n, totalRowsCount)
|
||||
}
|
||||
s.MustClose()
|
||||
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
90
lib/logstorage/stream_filter.go
Normal file
90
lib/logstorage/stream_filter.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
)
|
||||
|
||||
// StreamFilter is a filter for streams, e.g. `_stream:{...}`
|
||||
type StreamFilter struct {
|
||||
orFilters []*andStreamFilter
|
||||
}
|
||||
|
||||
func (sf *StreamFilter) isEmpty() bool {
|
||||
for _, af := range sf.orFilters {
|
||||
if len(af.tagFilters) > 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (sf *StreamFilter) marshalForCacheKey(dst []byte) []byte {
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(sf.orFilters)))
|
||||
for _, af := range sf.orFilters {
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(af.tagFilters)))
|
||||
for _, f := range af.tagFilters {
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(f.tagName))
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(f.op))
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(f.value))
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (sf *StreamFilter) String() string {
|
||||
a := make([]string, len(sf.orFilters))
|
||||
for i := range a {
|
||||
a[i] = sf.orFilters[i].String()
|
||||
}
|
||||
return "{" + strings.Join(a, " or ") + "}"
|
||||
}
|
||||
|
||||
type andStreamFilter struct {
|
||||
tagFilters []*streamTagFilter
|
||||
}
|
||||
|
||||
func (af *andStreamFilter) String() string {
|
||||
a := make([]string, len(af.tagFilters))
|
||||
for i := range a {
|
||||
a[i] = af.tagFilters[i].String()
|
||||
}
|
||||
return strings.Join(a, ",")
|
||||
}
|
||||
|
||||
// streamTagFilter is a filter for `tagName op value`
|
||||
type streamTagFilter struct {
|
||||
// tagName is the name for the tag to filter
|
||||
tagName string
|
||||
// op is operation such as `=`, `!=`, `=~` or `!~`
|
||||
op string
|
||||
|
||||
// value is the value
|
||||
value string
|
||||
|
||||
regexpOnce sync.Once
|
||||
regexp *regexutil.PromRegex
|
||||
}
|
||||
|
||||
func (tf *streamTagFilter) getRegexp() *regexutil.PromRegex {
|
||||
tf.regexpOnce.Do(tf.initRegexp)
|
||||
return tf.regexp
|
||||
}
|
||||
|
||||
func (tf *streamTagFilter) initRegexp() {
|
||||
re, err := regexutil.NewPromRegex(tf.value)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot parse regexp %q: %s", tf.value, err)
|
||||
}
|
||||
tf.regexp = re
|
||||
}
|
||||
|
||||
func (tf *streamTagFilter) String() string {
|
||||
return quoteTokenIfNeeded(tf.tagName) + tf.op + strconv.Quote(tf.value)
|
||||
}
|
69
lib/logstorage/stream_id.go
Normal file
69
lib/logstorage/stream_id.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// streamID is an internal id of log stream.
|
||||
//
|
||||
// Blocks are ordered by streamID inside parts.
|
||||
type streamID struct {
|
||||
// tenantID is a tenant id for the given stream.
|
||||
// It is located at the beginning of streamID in order
|
||||
// to physically group blocks for the same tenants on the storage.
|
||||
tenantID TenantID
|
||||
|
||||
// id is internal id, which uniquely identifies the stream in the tenant by its labels.
|
||||
// It is calculated as a hash of canonically sorted stream labels.
|
||||
//
|
||||
// Streams with identical sets of labels, which belong to distinct tenants, have the same id.
|
||||
id u128
|
||||
}
|
||||
|
||||
// reset resets sid for subsequent re-use
|
||||
func (sid *streamID) reset() {
|
||||
*sid = streamID{}
|
||||
}
|
||||
|
||||
// String returns human-readable representation for sid.
|
||||
func (sid *streamID) String() string {
|
||||
return fmt.Sprintf("(tenant_id=%s, id=%s)", &sid.tenantID, &sid.id)
|
||||
}
|
||||
|
||||
// less returns true if a is less than sid.
|
||||
func (sid *streamID) less(a *streamID) bool {
|
||||
if !sid.tenantID.equal(&a.tenantID) {
|
||||
return sid.tenantID.less(&a.tenantID)
|
||||
}
|
||||
return sid.id.less(&a.id)
|
||||
}
|
||||
|
||||
// equal returns true if sid equalt to a.
|
||||
func (sid *streamID) equal(a *streamID) bool {
|
||||
if !sid.tenantID.equal(&a.tenantID) {
|
||||
return false
|
||||
}
|
||||
return sid.id.equal(&a.id)
|
||||
}
|
||||
|
||||
// marshal appends the marshaled sid to dst and returns the result
|
||||
func (sid *streamID) marshal(dst []byte) []byte {
|
||||
dst = sid.tenantID.marshal(dst)
|
||||
dst = sid.id.marshal(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals sid from src and returns the tail from src.
|
||||
func (sid *streamID) unmarshal(src []byte) ([]byte, error) {
|
||||
srcOrig := src
|
||||
tail, err := sid.tenantID.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, err
|
||||
}
|
||||
src = tail
|
||||
tail, err = sid.id.unmarshal(src)
|
||||
if err != nil {
|
||||
return srcOrig, err
|
||||
}
|
||||
return tail, nil
|
||||
}
|
172
lib/logstorage/stream_id_test.go
Normal file
172
lib/logstorage/stream_id_test.go
Normal file
|
@ -0,0 +1,172 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStreamIDMarshalUnmarshal(t *testing.T) {
|
||||
f := func(sid *streamID, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := sid.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected length of marshaled streamID; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
var sid2 streamID
|
||||
tail, err := sid2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on unmarshal(%s): %s", sid, err)
|
||||
}
|
||||
if len(tail) != 0 {
|
||||
t.Fatalf("unexpected non-empty tail on unmarshal(%s): %X", sid, tail)
|
||||
}
|
||||
if !reflect.DeepEqual(sid, &sid2) {
|
||||
t.Fatalf("unexpected result on unmarshal; got %s; want %s", &sid2, sid)
|
||||
}
|
||||
s1 := sid.String()
|
||||
s2 := sid2.String()
|
||||
if s1 != s2 {
|
||||
t.Fatalf("unexpected string result on unmarshal; got %s; want %s", s2, s1)
|
||||
}
|
||||
}
|
||||
f(&streamID{}, 24)
|
||||
f(&streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
lo: 89,
|
||||
hi: 344334,
|
||||
},
|
||||
}, 24)
|
||||
}
|
||||
|
||||
func TestStreamIDUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var sid streamID
|
||||
tail, err := sid.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
f([]byte("1234567890"))
|
||||
}
|
||||
|
||||
func TestStreamIDLessEqual(t *testing.T) {
|
||||
// compare equal values
|
||||
sid1 := &streamID{}
|
||||
sid2 := &streamID{}
|
||||
if sid1.less(sid2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if sid2.less(sid1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !sid1.equal(sid2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", sid1, sid2)
|
||||
}
|
||||
if !sid2.equal(sid1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", sid2, sid1)
|
||||
}
|
||||
|
||||
sid1 = &streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 2,
|
||||
},
|
||||
id: u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
},
|
||||
}
|
||||
sid2 = &streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 2,
|
||||
},
|
||||
id: u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
},
|
||||
}
|
||||
if sid1.less(sid2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if sid2.less(sid1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !sid1.equal(sid2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", sid1, sid2)
|
||||
}
|
||||
if !sid2.equal(sid1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", sid2, sid1)
|
||||
}
|
||||
|
||||
// compare unequal values
|
||||
sid1 = &streamID{
|
||||
id: u128{
|
||||
lo: 456,
|
||||
},
|
||||
}
|
||||
sid2 = &streamID{
|
||||
id: u128{
|
||||
hi: 123,
|
||||
},
|
||||
}
|
||||
if !sid1.less(sid2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", sid1, sid2)
|
||||
}
|
||||
if sid2.less(sid1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", sid2, sid1)
|
||||
}
|
||||
if sid1.equal(sid2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", sid1, sid2)
|
||||
}
|
||||
|
||||
sid1 = &streamID{
|
||||
id: u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
},
|
||||
}
|
||||
sid2 = &streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
},
|
||||
}
|
||||
if !sid1.less(sid2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", sid1, sid2)
|
||||
}
|
||||
if sid2.less(sid1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", sid2, sid1)
|
||||
}
|
||||
if sid1.equal(sid2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", sid1, sid2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamIDReset(t *testing.T) {
|
||||
sid := &streamID{
|
||||
tenantID: TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
},
|
||||
id: u128{
|
||||
hi: 234,
|
||||
lo: 9843,
|
||||
},
|
||||
}
|
||||
sid.reset()
|
||||
sidZero := &streamID{}
|
||||
if !reflect.DeepEqual(sid, sidZero) {
|
||||
t.Fatalf("non-zero streamID after reset(): %s", sid)
|
||||
}
|
||||
}
|
298
lib/logstorage/stream_tags.go
Normal file
298
lib/logstorage/stream_tags.go
Normal file
|
@ -0,0 +1,298 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// GetStreamTags returns a StreamTags from pool.
|
||||
func GetStreamTags() *StreamTags {
|
||||
v := streamTagsPool.Get()
|
||||
if v == nil {
|
||||
return &StreamTags{}
|
||||
}
|
||||
return v.(*StreamTags)
|
||||
}
|
||||
|
||||
// PutStreamTags returns st to the pool.
|
||||
func PutStreamTags(st *StreamTags) {
|
||||
st.Reset()
|
||||
streamTagsPool.Put(st)
|
||||
}
|
||||
|
||||
var streamTagsPool sync.Pool
|
||||
|
||||
// StreamTags contains stream tags.
|
||||
type StreamTags struct {
|
||||
// buf holds all the data backed by tags
|
||||
buf []byte
|
||||
|
||||
// tags contains added tags.
|
||||
tags []streamTag
|
||||
}
|
||||
|
||||
// Reset resets st for re-use
|
||||
func (st *StreamTags) Reset() {
|
||||
st.buf = st.buf[:0]
|
||||
|
||||
tags := st.tags
|
||||
for i := range tags {
|
||||
t := &tags[i]
|
||||
t.Name = nil
|
||||
t.Value = nil
|
||||
}
|
||||
st.tags = tags[:0]
|
||||
}
|
||||
|
||||
// String returns string representation of st.
|
||||
func (st *StreamTags) String() string {
|
||||
b := st.marshalString(nil)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func (st *StreamTags) marshalString(dst []byte) []byte {
|
||||
dst = append(dst, '{')
|
||||
|
||||
tags := st.tags
|
||||
if len(tags) > 0 {
|
||||
dst = tags[0].marshalString(dst)
|
||||
tags = tags[1:]
|
||||
for i := range tags {
|
||||
dst = append(dst, ',')
|
||||
dst = tags[i].marshalString(dst)
|
||||
}
|
||||
}
|
||||
|
||||
dst = append(dst, '}')
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// Add adds (name:value) tag to st.
|
||||
func (st *StreamTags) Add(name, value string) {
|
||||
if len(name) == 0 || len(value) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
buf := st.buf
|
||||
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, name...)
|
||||
bName := buf[bufLen:]
|
||||
|
||||
bufLen = len(buf)
|
||||
buf = append(buf, value...)
|
||||
bValue := buf[bufLen:]
|
||||
|
||||
st.buf = buf
|
||||
|
||||
st.tags = append(st.tags, streamTag{
|
||||
Name: bName,
|
||||
Value: bValue,
|
||||
})
|
||||
}
|
||||
|
||||
// MarshalCanonical marshal st in a canonical way
|
||||
func (st *StreamTags) MarshalCanonical(dst []byte) []byte {
|
||||
sort.Sort(st)
|
||||
|
||||
tags := st.tags
|
||||
dst = encoding.MarshalVarUint64(dst, uint64(len(tags)))
|
||||
for i := range tags {
|
||||
tag := &tags[i]
|
||||
dst = encoding.MarshalBytes(dst, tag.Name)
|
||||
dst = encoding.MarshalBytes(dst, tag.Value)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// UnmarshalCanonical unmarshals st from src marshaled with MarshalCanonical.
|
||||
func (st *StreamTags) UnmarshalCanonical(src []byte) ([]byte, error) {
|
||||
st.Reset()
|
||||
|
||||
srcOrig := src
|
||||
|
||||
tail, n, err := encoding.UnmarshalVarUint64(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal tags len: %w", err)
|
||||
}
|
||||
src = tail
|
||||
for i := uint64(0); i < n; i++ {
|
||||
tail, name, err := encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal tag name: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
tail, value, err := encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal tag value: %w", err)
|
||||
}
|
||||
src = tail
|
||||
|
||||
sName := bytesutil.ToUnsafeString(name)
|
||||
sValue := bytesutil.ToUnsafeString(value)
|
||||
st.Add(sName, sValue)
|
||||
}
|
||||
|
||||
return src, nil
|
||||
}
|
||||
|
||||
func getStreamTagsString(streamTagsCanonical []byte) string {
|
||||
st := GetStreamTags()
|
||||
mustUnmarshalStreamTags(st, streamTagsCanonical)
|
||||
s := st.String()
|
||||
PutStreamTags(st)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func mustUnmarshalStreamTags(dst *StreamTags, src []byte) {
|
||||
tail, err := dst.UnmarshalCanonical(src)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal StreamTags from value obtained from cache: %s", err)
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
logger.Panicf("FATAL: unexpected tail left after unmarshaling StreamTags; len(tail)=%d; tail=%q", len(tail), tail)
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of tags in st.
|
||||
func (st *StreamTags) Len() int {
|
||||
return len(st.tags)
|
||||
}
|
||||
|
||||
// Less returns true if tag i is smaller than the tag j.
|
||||
func (st *StreamTags) Less(i, j int) bool {
|
||||
tags := st.tags
|
||||
return tags[i].less(&tags[j])
|
||||
}
|
||||
|
||||
// Swap swaps i and j tags
|
||||
func (st *StreamTags) Swap(i, j int) {
|
||||
tags := st.tags
|
||||
tags[i], tags[j] = tags[j], tags[i]
|
||||
}
|
||||
|
||||
// streamTag represents a (name:value) tag for stream.
|
||||
type streamTag struct {
|
||||
Name []byte
|
||||
Value []byte
|
||||
}
|
||||
|
||||
func (tag *streamTag) marshalString(dst []byte) []byte {
|
||||
dst = append(dst, tag.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = strconv.AppendQuote(dst, bytesutil.ToUnsafeString(tag.Value))
|
||||
return dst
|
||||
}
|
||||
|
||||
// reset resets the tag.
|
||||
func (tag *streamTag) reset() {
|
||||
tag.Name = tag.Name[:0]
|
||||
tag.Value = tag.Value[:0]
|
||||
}
|
||||
|
||||
func (tag *streamTag) equal(t *streamTag) bool {
|
||||
return string(tag.Name) == string(t.Name) && string(tag.Value) == string(t.Value)
|
||||
}
|
||||
|
||||
func (tag *streamTag) less(t *streamTag) bool {
|
||||
if string(tag.Name) != string(t.Name) {
|
||||
return string(tag.Name) < string(t.Name)
|
||||
}
|
||||
return string(tag.Value) < string(t.Value)
|
||||
}
|
||||
|
||||
func (tag *streamTag) indexdbMarshal(dst []byte) []byte {
|
||||
dst = marshalTagValue(dst, tag.Name)
|
||||
dst = marshalTagValue(dst, tag.Value)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (tag *streamTag) indexdbUnmarshal(src []byte) ([]byte, error) {
|
||||
var err error
|
||||
src, tag.Name, err = unmarshalTagValue(tag.Name[:0], src)
|
||||
if err != nil {
|
||||
return src, fmt.Errorf("cannot unmarshal key: %w", err)
|
||||
}
|
||||
src, tag.Value, err = unmarshalTagValue(tag.Value[:0], src)
|
||||
if err != nil {
|
||||
return src, fmt.Errorf("cannot unmarshal value: %w", err)
|
||||
}
|
||||
return src, nil
|
||||
}
|
||||
|
||||
const (
|
||||
escapeChar = 0
|
||||
tagSeparatorChar = 1
|
||||
kvSeparatorChar = 2
|
||||
)
|
||||
|
||||
func marshalTagValue(dst, src []byte) []byte {
|
||||
n1 := bytes.IndexByte(src, escapeChar)
|
||||
n2 := bytes.IndexByte(src, tagSeparatorChar)
|
||||
n3 := bytes.IndexByte(src, kvSeparatorChar)
|
||||
if n1 < 0 && n2 < 0 && n3 < 0 {
|
||||
// Fast path.
|
||||
dst = append(dst, src...)
|
||||
dst = append(dst, tagSeparatorChar)
|
||||
return dst
|
||||
}
|
||||
|
||||
// Slow path.
|
||||
for _, ch := range src {
|
||||
switch ch {
|
||||
case escapeChar:
|
||||
dst = append(dst, escapeChar, '0')
|
||||
case tagSeparatorChar:
|
||||
dst = append(dst, escapeChar, '1')
|
||||
case kvSeparatorChar:
|
||||
dst = append(dst, escapeChar, '2')
|
||||
default:
|
||||
dst = append(dst, ch)
|
||||
}
|
||||
}
|
||||
|
||||
dst = append(dst, tagSeparatorChar)
|
||||
return dst
|
||||
}
|
||||
|
||||
func unmarshalTagValue(dst, src []byte) ([]byte, []byte, error) {
|
||||
n := bytes.IndexByte(src, tagSeparatorChar)
|
||||
if n < 0 {
|
||||
return src, dst, fmt.Errorf("cannot find the end of tag value")
|
||||
}
|
||||
b := src[:n]
|
||||
src = src[n+1:]
|
||||
for {
|
||||
n := bytes.IndexByte(b, escapeChar)
|
||||
if n < 0 {
|
||||
dst = append(dst, b...)
|
||||
return src, dst, nil
|
||||
}
|
||||
dst = append(dst, b[:n]...)
|
||||
b = b[n+1:]
|
||||
if len(b) == 0 {
|
||||
return src, dst, fmt.Errorf("missing escaped char")
|
||||
}
|
||||
switch b[0] {
|
||||
case '0':
|
||||
dst = append(dst, escapeChar)
|
||||
case '1':
|
||||
dst = append(dst, tagSeparatorChar)
|
||||
case '2':
|
||||
dst = append(dst, kvSeparatorChar)
|
||||
default:
|
||||
return src, dst, fmt.Errorf("unsupported escaped char: %c", b[0])
|
||||
}
|
||||
b = b[1:]
|
||||
}
|
||||
}
|
91
lib/logstorage/tenant_id.go
Normal file
91
lib/logstorage/tenant_id.go
Normal file
|
@ -0,0 +1,91 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
// TenantID is an id of a tenant for log streams.
|
||||
//
|
||||
// Each log stream is associated with a single TenantID.
|
||||
type TenantID struct {
|
||||
// AccountID is the id of the account for the log stream.
|
||||
AccountID uint32
|
||||
|
||||
// ProjectID is the id of the project for the log stream.
|
||||
ProjectID uint32
|
||||
}
|
||||
|
||||
// Reset resets tid.
|
||||
func (tid *TenantID) Reset() {
|
||||
tid.AccountID = 0
|
||||
tid.ProjectID = 0
|
||||
}
|
||||
|
||||
// String returns human-readable representation of tid
|
||||
func (tid *TenantID) String() string {
|
||||
return fmt.Sprintf("{accountID=%d,projectID=%d}", tid.AccountID, tid.ProjectID)
|
||||
}
|
||||
|
||||
// equal returns true if tid equals to a.
|
||||
func (tid *TenantID) equal(a *TenantID) bool {
|
||||
return tid.AccountID == a.AccountID && tid.ProjectID == a.ProjectID
|
||||
}
|
||||
|
||||
// less returns true if tid is less than a.
|
||||
func (tid *TenantID) less(a *TenantID) bool {
|
||||
if tid.AccountID != a.AccountID {
|
||||
return tid.AccountID < a.AccountID
|
||||
}
|
||||
return tid.ProjectID < a.ProjectID
|
||||
}
|
||||
|
||||
// marshal appends the marshaled tid to dst and returns the result
|
||||
func (tid *TenantID) marshal(dst []byte) []byte {
|
||||
dst = encoding.MarshalUint32(dst, tid.AccountID)
|
||||
dst = encoding.MarshalUint32(dst, tid.ProjectID)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals tid from src and returns the remaining tail.
|
||||
func (tid *TenantID) unmarshal(src []byte) ([]byte, error) {
|
||||
if len(src) < 8 {
|
||||
return src, fmt.Errorf("cannot unmarshal tenantID from %d bytes; need at least 8 bytes", len(src))
|
||||
}
|
||||
tid.AccountID = encoding.UnmarshalUint32(src[:4])
|
||||
tid.ProjectID = encoding.UnmarshalUint32(src[4:])
|
||||
return src[8:], nil
|
||||
}
|
||||
|
||||
// GetTenantIDFromRequest returns tenantID from r.
|
||||
func GetTenantIDFromRequest(r *http.Request) (TenantID, error) {
|
||||
var tenantID TenantID
|
||||
|
||||
accountID, err := getUint32FromHeader(r, "AccountID")
|
||||
if err != nil {
|
||||
return tenantID, err
|
||||
}
|
||||
projectID, err := getUint32FromHeader(r, "ProjectID")
|
||||
if err != nil {
|
||||
return tenantID, err
|
||||
}
|
||||
|
||||
tenantID.AccountID = accountID
|
||||
tenantID.ProjectID = projectID
|
||||
return tenantID, nil
|
||||
}
|
||||
|
||||
func getUint32FromHeader(r *http.Request, headerName string) (uint32, error) {
|
||||
s := r.Header.Get(headerName)
|
||||
if len(s) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
n, err := strconv.ParseUint(s, 10, 32)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse %s header %q: %w", headerName, s, err)
|
||||
}
|
||||
return uint32(n), nil
|
||||
}
|
124
lib/logstorage/tenant_id_test.go
Normal file
124
lib/logstorage/tenant_id_test.go
Normal file
|
@ -0,0 +1,124 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTenantIDMarshalUnmarshal(t *testing.T) {
|
||||
f := func(tid *TenantID) {
|
||||
t.Helper()
|
||||
data := tid.marshal(nil)
|
||||
var tid2 TenantID
|
||||
tail, err := tid2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error at unmarshal(%s): %s", tid, err)
|
||||
}
|
||||
if len(tail) != 0 {
|
||||
t.Fatalf("unexpected non-emtpy tail after unmarshal(%s): %X", tid, tail)
|
||||
}
|
||||
if !reflect.DeepEqual(tid, &tid2) {
|
||||
t.Fatalf("unexpected value after unmarshal; got %s; want %s", &tid2, tid)
|
||||
}
|
||||
s1 := tid.String()
|
||||
s2 := tid2.String()
|
||||
if s1 != s2 {
|
||||
t.Fatalf("unexpected string value after unmarshal; got %s; want %s", s2, s1)
|
||||
}
|
||||
}
|
||||
f(&TenantID{})
|
||||
f(&TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
})
|
||||
}
|
||||
|
||||
func TestTenantIDUnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var tid TenantID
|
||||
tail, err := tid.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("abc"))
|
||||
}
|
||||
|
||||
func TestTenantIDLessEqual(t *testing.T) {
|
||||
// compare equal values
|
||||
tid1 := &TenantID{}
|
||||
tid2 := &TenantID{}
|
||||
if tid1.less(tid2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if tid2.less(tid1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !tid1.equal(tid2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", tid1, tid2)
|
||||
}
|
||||
if !tid2.equal(tid1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", tid2, tid1)
|
||||
}
|
||||
|
||||
tid1 = &TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
}
|
||||
tid2 = &TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
}
|
||||
if tid1.less(tid2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if tid2.less(tid1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !tid1.equal(tid2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", tid1, tid2)
|
||||
}
|
||||
if !tid2.equal(tid1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", tid2, tid1)
|
||||
}
|
||||
|
||||
// compare unequal values
|
||||
tid1 = &TenantID{
|
||||
ProjectID: 456,
|
||||
}
|
||||
tid2 = &TenantID{
|
||||
AccountID: 123,
|
||||
}
|
||||
if !tid1.less(tid2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", tid1, tid2)
|
||||
}
|
||||
if tid2.less(tid1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", tid2, tid1)
|
||||
}
|
||||
if tid1.equal(tid2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", tid1, tid2)
|
||||
}
|
||||
|
||||
tid1 = &TenantID{
|
||||
AccountID: 123,
|
||||
}
|
||||
tid2 = &TenantID{
|
||||
AccountID: 123,
|
||||
ProjectID: 456,
|
||||
}
|
||||
if !tid1.less(tid2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", tid1, tid2)
|
||||
}
|
||||
if tid2.less(tid1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", tid2, tid1)
|
||||
}
|
||||
if tid1.equal(tid2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", tid1, tid2)
|
||||
}
|
||||
}
|
153
lib/logstorage/tokenizer.go
Normal file
153
lib/logstorage/tokenizer.go
Normal file
|
@ -0,0 +1,153 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// tokenizeStrings extracts word tokens from a, appends them to dst and returns the result.
|
||||
func tokenizeStrings(dst, a []string) []string {
|
||||
t := getTokenizer()
|
||||
m := t.m
|
||||
for i, s := range a {
|
||||
if i > 0 && s == a[i-1] {
|
||||
// This string has been already tokenized
|
||||
continue
|
||||
}
|
||||
tokenizeString(m, s)
|
||||
}
|
||||
dstLen := len(dst)
|
||||
for k := range t.m {
|
||||
dst = append(dst, k)
|
||||
}
|
||||
putTokenizer(t)
|
||||
|
||||
// Sort tokens with zero memory allocations
|
||||
ss := getStringsSorter(dst[dstLen:])
|
||||
sort.Sort(ss)
|
||||
putStringsSorter(ss)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
type tokenizer struct {
|
||||
m map[string]struct{}
|
||||
}
|
||||
|
||||
func (t *tokenizer) reset() {
|
||||
m := t.m
|
||||
for k := range m {
|
||||
delete(m, k)
|
||||
}
|
||||
}
|
||||
|
||||
func tokenizeString(dst map[string]struct{}, s string) {
|
||||
for len(s) > 0 {
|
||||
// Search for the next token.
|
||||
nextIdx := len(s)
|
||||
for i, c := range s {
|
||||
if isTokenRune(c) {
|
||||
nextIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
s = s[nextIdx:]
|
||||
// Search for the end of the token
|
||||
nextIdx = len(s)
|
||||
for i, c := range s {
|
||||
if !isTokenRune(c) {
|
||||
nextIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
token := s[:nextIdx]
|
||||
if len(token) > 0 {
|
||||
dst[token] = struct{}{}
|
||||
}
|
||||
s = s[nextIdx:]
|
||||
}
|
||||
}
|
||||
|
||||
func isTokenRune(c rune) bool {
|
||||
return unicode.IsLetter(c) || unicode.IsDigit(c) || c == '_'
|
||||
}
|
||||
|
||||
func getTokenizer() *tokenizer {
|
||||
v := tokenizerPool.Get()
|
||||
if v == nil {
|
||||
return &tokenizer{
|
||||
m: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
return v.(*tokenizer)
|
||||
}
|
||||
|
||||
func putTokenizer(t *tokenizer) {
|
||||
t.reset()
|
||||
tokenizerPool.Put(t)
|
||||
}
|
||||
|
||||
var tokenizerPool sync.Pool
|
||||
|
||||
type stringsSorter struct {
|
||||
a []string
|
||||
}
|
||||
|
||||
func (ss *stringsSorter) Len() int {
|
||||
return len(ss.a)
|
||||
}
|
||||
func (ss *stringsSorter) Swap(i, j int) {
|
||||
a := ss.a
|
||||
a[i], a[j] = a[j], a[i]
|
||||
}
|
||||
func (ss *stringsSorter) Less(i, j int) bool {
|
||||
a := ss.a
|
||||
return a[i] < a[j]
|
||||
}
|
||||
|
||||
func getStringsSorter(a []string) *stringsSorter {
|
||||
v := stringsSorterPool.Get()
|
||||
if v == nil {
|
||||
return &stringsSorter{
|
||||
a: a,
|
||||
}
|
||||
}
|
||||
ss := v.(*stringsSorter)
|
||||
ss.a = a
|
||||
return ss
|
||||
}
|
||||
|
||||
func putStringsSorter(ss *stringsSorter) {
|
||||
ss.a = nil
|
||||
stringsSorterPool.Put(ss)
|
||||
}
|
||||
|
||||
var stringsSorterPool sync.Pool
|
||||
|
||||
type tokensBuf struct {
|
||||
A []string
|
||||
}
|
||||
|
||||
func (tb *tokensBuf) reset() {
|
||||
a := tb.A
|
||||
for i := range a {
|
||||
a[i] = ""
|
||||
}
|
||||
tb.A = a[:0]
|
||||
}
|
||||
|
||||
func getTokensBuf() *tokensBuf {
|
||||
v := tokensBufPool.Get()
|
||||
if v == nil {
|
||||
return &tokensBuf{}
|
||||
}
|
||||
return v.(*tokensBuf)
|
||||
}
|
||||
|
||||
func putTokensBuf(tb *tokensBuf) {
|
||||
tb.reset()
|
||||
tokensBufPool.Put(tb)
|
||||
}
|
||||
|
||||
var tokensBufPool sync.Pool
|
29
lib/logstorage/tokenizer_test.go
Normal file
29
lib/logstorage/tokenizer_test.go
Normal file
|
@ -0,0 +1,29 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenizeStrings(t *testing.T) {
|
||||
f := func(a, tokensExpected []string) {
|
||||
t.Helper()
|
||||
tokens := tokenizeStrings(nil, a)
|
||||
if !reflect.DeepEqual(tokens, tokensExpected) {
|
||||
t.Fatalf("unexpected tokens;\ngot\n%q\nwant\n%q", tokens, tokensExpected)
|
||||
}
|
||||
}
|
||||
f(nil, nil)
|
||||
f([]string{""}, nil)
|
||||
f([]string{"foo"}, []string{"foo"})
|
||||
f([]string{"foo bar---.!!([baz]!!! %$# TaSte"}, []string{"TaSte", "bar", "baz", "foo"})
|
||||
f([]string{"теСТ 1234 f12.34", "34 f12 AS"}, []string{"1234", "34", "AS", "f12", "теСТ"})
|
||||
f(strings.Split(`
|
||||
Apr 28 13:43:38 localhost whoopsie[2812]: [13:43:38] online
|
||||
Apr 28 13:45:01 localhost CRON[12181]: (root) CMD (command -v debian-sa1 > /dev/null && debian-sa1 1 1)
|
||||
Apr 28 13:48:01 localhost kernel: [36020.497806] CPU0: Core temperature above threshold, cpu clock throttled (total events = 22034)
|
||||
`, "\n"), []string{"01", "1", "12181", "13", "22034", "28", "2812", "36020", "38", "43", "45", "48", "497806", "Apr", "CMD", "CPU0", "CRON",
|
||||
"Core", "above", "clock", "command", "cpu", "debian", "dev", "events", "kernel", "localhost", "null", "online", "root",
|
||||
"sa1", "temperature", "threshold", "throttled", "total", "v", "whoopsie"})
|
||||
}
|
19
lib/logstorage/tokenizer_timing_test.go
Normal file
19
lib/logstorage/tokenizer_timing_test.go
Normal file
|
@ -0,0 +1,19 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkTokenizeStrings(b *testing.B) {
|
||||
a := strings.Split(benchLogs, "\n")
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(benchLogs)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var tokens []string
|
||||
for pb.Next() {
|
||||
tokens = tokenizeStrings(tokens[:0], a)
|
||||
}
|
||||
})
|
||||
}
|
50
lib/logstorage/u128.go
Normal file
50
lib/logstorage/u128.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
// u128 is 128-bit uint number.
|
||||
//
|
||||
// It is used as an unique id of stream.
|
||||
type u128 struct {
|
||||
hi uint64
|
||||
lo uint64
|
||||
}
|
||||
|
||||
// String returns human-readable representation of u.
|
||||
func (u *u128) String() string {
|
||||
return fmt.Sprintf("{hi=%d,lo=%d}", u.hi, u.lo)
|
||||
}
|
||||
|
||||
// less returns true if u is less than a.
|
||||
func (u *u128) less(a *u128) bool {
|
||||
if u.hi != a.hi {
|
||||
return u.hi < a.hi
|
||||
}
|
||||
return u.lo < a.lo
|
||||
}
|
||||
|
||||
// equal returns true if u equalst to a.
|
||||
func (u *u128) equal(a *u128) bool {
|
||||
return u.hi == a.hi && u.lo == a.lo
|
||||
}
|
||||
|
||||
// marshal appends the marshaled u to dst and returns the result.
|
||||
func (u *u128) marshal(dst []byte) []byte {
|
||||
dst = encoding.MarshalUint64(dst, u.hi)
|
||||
dst = encoding.MarshalUint64(dst, u.lo)
|
||||
return dst
|
||||
}
|
||||
|
||||
// unmarshal unmarshals u from src and returns the tail.
|
||||
func (u *u128) unmarshal(src []byte) ([]byte, error) {
|
||||
if len(src) < 16 {
|
||||
return src, fmt.Errorf("cannot unmarshal u128 from %d bytes; need at least 16 bytes", len(src))
|
||||
}
|
||||
u.hi = encoding.UnmarshalUint64(src[:8])
|
||||
u.lo = encoding.UnmarshalUint64(src[8:])
|
||||
return src[16:], nil
|
||||
}
|
127
lib/logstorage/u128_test.go
Normal file
127
lib/logstorage/u128_test.go
Normal file
|
@ -0,0 +1,127 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestU128MarshalUnmarshal(t *testing.T) {
|
||||
f := func(u *u128, marshaledLen int) {
|
||||
t.Helper()
|
||||
data := u.marshal(nil)
|
||||
if len(data) != marshaledLen {
|
||||
t.Fatalf("unexpected length of marshaled u128; got %d; want %d", len(data), marshaledLen)
|
||||
}
|
||||
var u2 u128
|
||||
tail, err := u2.unmarshal(data)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error at unmarshal(%s): %s", u, err)
|
||||
}
|
||||
if len(tail) != 0 {
|
||||
t.Fatalf("unexpected non-emtpy tail after unmarshal(%s): %X", u, tail)
|
||||
}
|
||||
if !reflect.DeepEqual(u, &u2) {
|
||||
t.Fatalf("unexpected value obtained from unmarshal(%s); got %s; want %s", u, &u2, u)
|
||||
}
|
||||
s1 := u.String()
|
||||
s2 := u2.String()
|
||||
if s1 != s2 {
|
||||
t.Fatalf("unexpected string representation after unmarshal; got %s; want %s", s2, s1)
|
||||
}
|
||||
}
|
||||
f(&u128{}, 16)
|
||||
f(&u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
}, 16)
|
||||
}
|
||||
|
||||
func TestU128UnmarshalFailure(t *testing.T) {
|
||||
f := func(data []byte) {
|
||||
t.Helper()
|
||||
dataOrig := append([]byte{}, data...)
|
||||
var u u128
|
||||
tail, err := u.unmarshal(data)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if string(tail) != string(dataOrig) {
|
||||
t.Fatalf("unexpected tail; got %q; want %q", tail, dataOrig)
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
f([]byte("foo"))
|
||||
}
|
||||
|
||||
func TestU128LessEqual(t *testing.T) {
|
||||
// compare equal values
|
||||
u1 := &u128{}
|
||||
u2 := &u128{}
|
||||
if u1.less(u2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if u2.less(u1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !u1.equal(u2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", u1, u2)
|
||||
}
|
||||
if !u2.equal(u1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", u2, u1)
|
||||
}
|
||||
|
||||
u1 = &u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
}
|
||||
u2 = &u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
}
|
||||
if u1.less(u2) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if u2.less(u1) {
|
||||
t.Fatalf("less for equal values must return false")
|
||||
}
|
||||
if !u1.equal(u2) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", u1, u2)
|
||||
}
|
||||
if !u2.equal(u1) {
|
||||
t.Fatalf("unexpected equal(%s, %s) result; got false; want true", u2, u1)
|
||||
}
|
||||
|
||||
// compare unequal values
|
||||
u1 = &u128{
|
||||
lo: 456,
|
||||
}
|
||||
u2 = &u128{
|
||||
hi: 123,
|
||||
}
|
||||
if !u1.less(u2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", u1, u2)
|
||||
}
|
||||
if u2.less(u1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", u2, u1)
|
||||
}
|
||||
if u1.equal(u2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", u1, u2)
|
||||
}
|
||||
|
||||
u1 = &u128{
|
||||
hi: 123,
|
||||
}
|
||||
u2 = &u128{
|
||||
hi: 123,
|
||||
lo: 456,
|
||||
}
|
||||
if !u1.less(u2) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got false; want true", u1, u2)
|
||||
}
|
||||
if u2.less(u1) {
|
||||
t.Fatalf("unexpected result for less(%s, %s); got true; want false", u2, u1)
|
||||
}
|
||||
if u1.equal(u2) {
|
||||
t.Fatalf("unexpected result for equal(%s, %s); got true; want false", u1, u2)
|
||||
}
|
||||
}
|
742
lib/logstorage/values_encoder.go
Normal file
742
lib/logstorage/values_encoder.go
Normal file
|
@ -0,0 +1,742 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"math/bits"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// valueType is the type of values stored in every column block.
|
||||
type valueType byte
|
||||
|
||||
const (
|
||||
// valueTypeUnknown is used for determining whether the value type is unknown.
|
||||
valueTypeUnknown = valueType(0)
|
||||
|
||||
// default encoding for column blocks. Strings are stored as is.
|
||||
valueTypeString = valueType(1)
|
||||
|
||||
// column blocks with small number of unique values are encoded as dict.
|
||||
valueTypeDict = valueType(2)
|
||||
|
||||
// uint values up to 2^8-1 are encoded into valueTypeUint8.
|
||||
// Every value occupies a single byte.
|
||||
valueTypeUint8 = valueType(3)
|
||||
|
||||
// uint values up to 2^16-1 are encoded into valueTypeUint16.
|
||||
// Every value occupies 2 bytes.
|
||||
valueTypeUint16 = valueType(4)
|
||||
|
||||
// uint values up to 2^31-1 are encoded into valueTypeUint32.
|
||||
// Every value occupies 4 bytes.
|
||||
valueTypeUint32 = valueType(5)
|
||||
|
||||
// uint values up to 2^64-1 are encoded into valueTypeUint64.
|
||||
// Every value occupies 8 bytes.
|
||||
valueTypeUint64 = valueType(6)
|
||||
|
||||
// floating-point values are encoded into valueTypeFloat64.
|
||||
valueTypeFloat64 = valueType(7)
|
||||
|
||||
// column blocks with ipv4 addresses are encoded as 4-byte strings.
|
||||
valueTypeIPv4 = valueType(8)
|
||||
|
||||
// column blocks with ISO8601 timestamps are encoded into valueTypeTimestampISO8601.
|
||||
// These timestamps are commonly used by Logstash.
|
||||
valueTypeTimestampISO8601 = valueType(9)
|
||||
)
|
||||
|
||||
type valuesEncoder struct {
|
||||
// buf contains data for values.
|
||||
buf []byte
|
||||
|
||||
// values contains encoded values.
|
||||
values []string
|
||||
}
|
||||
|
||||
func (ve *valuesEncoder) reset() {
|
||||
ve.buf = ve.buf[:0]
|
||||
|
||||
vs := ve.values
|
||||
for i := range vs {
|
||||
vs[i] = ""
|
||||
}
|
||||
ve.values = vs[:0]
|
||||
}
|
||||
|
||||
// encode encodes values to ve.values and returns the encoded value type with min/max encoded values.
|
||||
func (ve *valuesEncoder) encode(values []string, dict *valuesDict) (valueType, uint64, uint64) {
|
||||
ve.reset()
|
||||
|
||||
if len(values) == 0 {
|
||||
return valueTypeString, 0, 0
|
||||
}
|
||||
|
||||
var vt valueType
|
||||
var minValue, maxValue uint64
|
||||
|
||||
// Try dict encoding at first, since it gives the highest speedup during querying.
|
||||
// It also usually gives the best compression, since every value is encoded as a single byte.
|
||||
ve.buf, ve.values, vt = tryDictEncoding(ve.buf[:0], ve.values[:0], values, dict)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, 0, 0
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryUintEncoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryFloat64Encoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryIPv4Encoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryTimestampISO8601Encoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
// Fall back to default encoding, e.g. leave values as is.
|
||||
ve.values = append(ve.values[:0], values...)
|
||||
return valueTypeString, 0, 0
|
||||
}
|
||||
|
||||
func getValuesEncoder() *valuesEncoder {
|
||||
v := valuesEncoderPool.Get()
|
||||
if v == nil {
|
||||
return &valuesEncoder{}
|
||||
}
|
||||
return v.(*valuesEncoder)
|
||||
}
|
||||
|
||||
func putValuesEncoder(ve *valuesEncoder) {
|
||||
ve.reset()
|
||||
valuesEncoderPool.Put(ve)
|
||||
}
|
||||
|
||||
var valuesEncoderPool sync.Pool
|
||||
|
||||
type valuesDecoder struct {
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (vd *valuesDecoder) reset() {
|
||||
vd.buf = vd.buf[:0]
|
||||
}
|
||||
|
||||
// decodeInplace decodes values encoded with the given vt and the given dict inplace.
|
||||
//
|
||||
// the decoded values remain valid until vd.reset() is called.
|
||||
func (vd *valuesDecoder) decodeInplace(values []string, vt valueType, dict *valuesDict) error {
|
||||
// do not reset vd.buf, since it may contain previously decoded data,
|
||||
// which must be preserved until reset() call.
|
||||
dstBuf := vd.buf
|
||||
|
||||
switch vt {
|
||||
case valueTypeString:
|
||||
// nothing to do - values are already decoded.
|
||||
case valueTypeUint8:
|
||||
for i, v := range values {
|
||||
if len(v) != 1 {
|
||||
return fmt.Errorf("unexpected value length for uint8; got %d; want 1", len(v))
|
||||
}
|
||||
n := uint64(v[0])
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = strconv.AppendUint(dstBuf, n, 10)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeUint16:
|
||||
for i, v := range values {
|
||||
if len(v) != 2 {
|
||||
return fmt.Errorf("unexpected value length for uint16; got %d; want 2", len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint16(b))
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = strconv.AppendUint(dstBuf, n, 10)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeUint32:
|
||||
for i, v := range values {
|
||||
if len(v) != 4 {
|
||||
return fmt.Errorf("unexpected value length for uint32; got %d; want 4", len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint32(b))
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = strconv.AppendUint(dstBuf, n, 10)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeUint64:
|
||||
for i, v := range values {
|
||||
if len(v) != 8 {
|
||||
return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = strconv.AppendUint(dstBuf, n, 10)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeDict:
|
||||
dictValues := dict.values
|
||||
for i, v := range values {
|
||||
id := int(v[0])
|
||||
if id >= len(dictValues) {
|
||||
return fmt.Errorf("unexpected dictionary id: %d; it must be smaller than %d", id, len(dictValues))
|
||||
}
|
||||
values[i] = dictValues[id]
|
||||
}
|
||||
case valueTypeIPv4:
|
||||
for i, v := range values {
|
||||
if len(v) != 4 {
|
||||
return fmt.Errorf("unexpected value length for ipv4; got %d; want 4", len(v))
|
||||
}
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = toIPv4String(dstBuf, v)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeTimestampISO8601:
|
||||
for i, v := range values {
|
||||
if len(v) != 8 {
|
||||
return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
|
||||
}
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = toTimestampISO8601String(dstBuf, v)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeFloat64:
|
||||
for i, v := range values {
|
||||
if len(v) != 8 {
|
||||
return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
|
||||
}
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = toFloat64String(dstBuf, v)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown valueType=%d", vt)
|
||||
}
|
||||
|
||||
vd.buf = dstBuf
|
||||
return nil
|
||||
}
|
||||
|
||||
func toTimestampISO8601String(dst []byte, v string) []byte {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
t := time.Unix(0, int64(n)).UTC()
|
||||
dst = t.AppendFormat(dst, iso8601Timestamp)
|
||||
return dst
|
||||
}
|
||||
|
||||
func toIPv4String(dst []byte, v string) []byte {
|
||||
dst = strconv.AppendUint(dst, uint64(v[0]), 10)
|
||||
dst = append(dst, '.')
|
||||
dst = strconv.AppendUint(dst, uint64(v[1]), 10)
|
||||
dst = append(dst, '.')
|
||||
dst = strconv.AppendUint(dst, uint64(v[2]), 10)
|
||||
dst = append(dst, '.')
|
||||
dst = strconv.AppendUint(dst, uint64(v[3]), 10)
|
||||
return dst
|
||||
}
|
||||
|
||||
func toFloat64String(dst []byte, v string) []byte {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
f := math.Float64frombits(n)
|
||||
dst = strconv.AppendFloat(dst, f, 'g', -1, 64)
|
||||
return dst
|
||||
}
|
||||
|
||||
func getValuesDecoder() *valuesDecoder {
|
||||
v := valuesDecoderPool.Get()
|
||||
if v == nil {
|
||||
return &valuesDecoder{}
|
||||
}
|
||||
return v.(*valuesDecoder)
|
||||
}
|
||||
|
||||
func putValuesDecoder(vd *valuesDecoder) {
|
||||
vd.reset()
|
||||
valuesDecoderPool.Put(vd)
|
||||
}
|
||||
|
||||
var valuesDecoderPool sync.Pool
|
||||
|
||||
func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u64s := encoding.GetUint64s(len(srcValues))
|
||||
defer encoding.PutUint64s(u64s)
|
||||
a := u64s.A
|
||||
var minValue, maxValue uint64
|
||||
for i, v := range srcValues {
|
||||
n, ok := tryParseTimestampISO8601(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
a[i] = n
|
||||
if i == 0 || n < minValue {
|
||||
minValue = n
|
||||
}
|
||||
if i == 0 || n > maxValue {
|
||||
maxValue = n
|
||||
}
|
||||
}
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint64(dstBuf, n)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeTimestampISO8601, minValue, maxValue
|
||||
}
|
||||
|
||||
func tryParseTimestampISO8601(s string) (uint64, bool) {
|
||||
// Do not parse timestamps with timezone, since they cannot be converted back
|
||||
// to the same string representation in general case.
|
||||
// This may break search.
|
||||
if len(s) != len("2006-01-02T15:04:05.000Z") {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// Parse year
|
||||
if s[len("YYYY")] != '-' {
|
||||
return 0, false
|
||||
}
|
||||
yearStr := s[:len("YYYY")]
|
||||
n, ok := tryParseUint64(yearStr)
|
||||
if !ok || n > 3000 {
|
||||
return 0, false
|
||||
}
|
||||
year := int(n)
|
||||
s = s[len("YYYY")+1:]
|
||||
|
||||
// Parse month
|
||||
if s[len("MM")] != '-' {
|
||||
return 0, false
|
||||
}
|
||||
monthStr := s[:len("MM")]
|
||||
n, ok = tryParseUint64(monthStr)
|
||||
if !ok || n < 1 || n > 12 {
|
||||
return 0, false
|
||||
}
|
||||
month := time.Month(n)
|
||||
s = s[len("MM")+1:]
|
||||
|
||||
// Parse day
|
||||
if s[len("DD")] != 'T' {
|
||||
return 0, false
|
||||
}
|
||||
dayStr := s[:len("DD")]
|
||||
n, ok = tryParseUint64(dayStr)
|
||||
if !ok || n < 1 || n > 31 {
|
||||
return 0, false
|
||||
}
|
||||
day := int(n)
|
||||
s = s[len("DD")+1:]
|
||||
|
||||
// Parse hour
|
||||
if s[len("HH")] != ':' {
|
||||
return 0, false
|
||||
}
|
||||
hourStr := s[:len("HH")]
|
||||
n, ok = tryParseUint64(hourStr)
|
||||
if !ok || n > 23 {
|
||||
return 0, false
|
||||
}
|
||||
hour := int(n)
|
||||
s = s[len("HH")+1:]
|
||||
|
||||
// Parse minute
|
||||
if s[len("MM")] != ':' {
|
||||
return 0, false
|
||||
}
|
||||
minuteStr := s[:len("MM")]
|
||||
n, ok = tryParseUint64(minuteStr)
|
||||
if !ok || n > 59 {
|
||||
return 0, false
|
||||
}
|
||||
minute := int(n)
|
||||
s = s[len("MM")+1:]
|
||||
|
||||
// Parse second
|
||||
if s[len("SS")] != '.' {
|
||||
return 0, false
|
||||
}
|
||||
secondStr := s[:len("SS")]
|
||||
n, ok = tryParseUint64(secondStr)
|
||||
if !ok || n > 59 {
|
||||
return 0, false
|
||||
}
|
||||
second := int(n)
|
||||
s = s[len("SS")+1:]
|
||||
|
||||
// Parse millisecond
|
||||
tzDelimiter := s[len("000")]
|
||||
if tzDelimiter != 'Z' {
|
||||
return 0, false
|
||||
}
|
||||
millisecondStr := s[:len("000")]
|
||||
n, ok = tryParseUint64(millisecondStr)
|
||||
if !ok || n > 999 {
|
||||
return 0, false
|
||||
}
|
||||
millisecond := int(n)
|
||||
s = s[len("000")+1:]
|
||||
|
||||
if len(s) != 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
t := time.Date(year, month, day, hour, minute, second, millisecond*1e6, time.UTC)
|
||||
ts := t.UnixNano()
|
||||
return uint64(ts), true
|
||||
}
|
||||
|
||||
func tryParseUint64(s string) (uint64, bool) {
|
||||
if len(s) == 0 || len(s) > 18 {
|
||||
return 0, false
|
||||
}
|
||||
n := uint64(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
ch := s[i]
|
||||
if ch < '0' || ch > '9' {
|
||||
return 0, false
|
||||
}
|
||||
n *= 10
|
||||
n += uint64(ch - '0')
|
||||
}
|
||||
return n, true
|
||||
}
|
||||
|
||||
const iso8601Timestamp = "2006-01-02T15:04:05.000Z"
|
||||
|
||||
func tryIPv4Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u32s := encoding.GetUint32s(len(srcValues))
|
||||
defer encoding.PutUint32s(u32s)
|
||||
a := u32s.A
|
||||
var minValue, maxValue uint32
|
||||
for i, v := range srcValues {
|
||||
n, ok := tryParseIPv4(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
a[i] = n
|
||||
if i == 0 || n < minValue {
|
||||
minValue = n
|
||||
}
|
||||
if i == 0 || n > maxValue {
|
||||
maxValue = n
|
||||
}
|
||||
}
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint32(dstBuf, n)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeIPv4, uint64(minValue), uint64(maxValue)
|
||||
}
|
||||
|
||||
func tryParseIPv4(s string) (uint32, bool) {
|
||||
if len(s) < len("1.1.1.1") || len(s) > len("255.255.255.255") || strings.Count(s, ".") != 3 {
|
||||
// Fast path - the entry isn't IPv4
|
||||
return 0, false
|
||||
}
|
||||
|
||||
var octets [4]byte
|
||||
var v uint64
|
||||
var ok bool
|
||||
|
||||
// Parse octet 1
|
||||
n := strings.IndexByte(s, '.')
|
||||
if n <= 0 || n > 3 {
|
||||
return 0, false
|
||||
}
|
||||
v, ok = tryParseUint64(s[:n])
|
||||
if !ok || v > 255 {
|
||||
return 0, false
|
||||
}
|
||||
octets[0] = byte(v)
|
||||
s = s[n+1:]
|
||||
|
||||
// Parse octet 2
|
||||
n = strings.IndexByte(s, '.')
|
||||
if n <= 0 || n > 3 {
|
||||
return 0, false
|
||||
}
|
||||
v, ok = tryParseUint64(s[:n])
|
||||
if !ok || v > 255 {
|
||||
return 0, false
|
||||
}
|
||||
octets[1] = byte(v)
|
||||
s = s[n+1:]
|
||||
|
||||
// Parse octet 3
|
||||
n = strings.IndexByte(s, '.')
|
||||
if n <= 0 || n > 3 {
|
||||
return 0, false
|
||||
}
|
||||
v, ok = tryParseUint64(s[:n])
|
||||
if !ok || v > 255 {
|
||||
return 0, false
|
||||
}
|
||||
octets[2] = byte(v)
|
||||
s = s[n+1:]
|
||||
|
||||
// Parse octet 4
|
||||
v, ok = tryParseUint64(s)
|
||||
if !ok || v > 255 {
|
||||
return 0, false
|
||||
}
|
||||
octets[3] = byte(v)
|
||||
|
||||
ipv4 := encoding.UnmarshalUint32(octets[:])
|
||||
return ipv4, true
|
||||
}
|
||||
|
||||
func tryFloat64Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u64s := encoding.GetUint64s(len(srcValues))
|
||||
defer encoding.PutUint64s(u64s)
|
||||
a := u64s.A
|
||||
var minValue, maxValue float64
|
||||
for i, v := range srcValues {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
a[i] = math.Float64bits(f)
|
||||
if i == 0 || f < minValue {
|
||||
minValue = f
|
||||
}
|
||||
if i == 0 || f > maxValue {
|
||||
maxValue = f
|
||||
}
|
||||
}
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint64(dstBuf, n)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
minValueU64 := math.Float64bits(minValue)
|
||||
maxValueU64 := math.Float64bits(maxValue)
|
||||
return dstBuf, dstValues, valueTypeFloat64, minValueU64, maxValueU64
|
||||
}
|
||||
|
||||
func tryParseFloat64(s string) (float64, bool) {
|
||||
if len(s) == 0 || len(s) > 20 {
|
||||
return 0, false
|
||||
}
|
||||
// Allow only decimal digits, minus and a dot.
|
||||
// Do not allows scientific notation (for example 1.23E+05),
|
||||
// since it cannot be converted back to the same string form.
|
||||
|
||||
minus := s[0] == '-'
|
||||
if minus {
|
||||
s = s[1:]
|
||||
}
|
||||
n := strings.IndexByte(s, '.')
|
||||
if n < 0 {
|
||||
// fast path - there are no dots
|
||||
n, ok := tryParseUint64(s)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
f := float64(n)
|
||||
if minus {
|
||||
f = -f
|
||||
}
|
||||
return f, true
|
||||
}
|
||||
if n == 0 || n == len(s)-1 {
|
||||
// Do not allow dots at the beginning and at the end of s,
|
||||
// since they cannot be converted back to the same string form.
|
||||
return 0, false
|
||||
}
|
||||
sInt := s[:n]
|
||||
sFrac := s[n+1:]
|
||||
nInt, ok := tryParseUint64(sInt)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
nFrac, ok := tryParseUint64(sFrac)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
f := math.FMA(float64(nFrac), math.Pow10(-len(sFrac)), float64(nInt))
|
||||
if minus {
|
||||
f = -f
|
||||
}
|
||||
return f, true
|
||||
}
|
||||
|
||||
func tryUintEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u64s := encoding.GetUint64s(len(srcValues))
|
||||
defer encoding.PutUint64s(u64s)
|
||||
a := u64s.A
|
||||
var minValue, maxValue uint64
|
||||
for i, v := range srcValues {
|
||||
n, ok := tryParseUint64(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
a[i] = n
|
||||
if i == 0 || n < minValue {
|
||||
minValue = n
|
||||
}
|
||||
if i == 0 || n > maxValue {
|
||||
maxValue = n
|
||||
}
|
||||
}
|
||||
|
||||
minBitSize := bits.Len64(maxValue)
|
||||
if minBitSize <= 8 {
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = append(dstBuf, byte(n))
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeUint8, minValue, maxValue
|
||||
}
|
||||
if minBitSize <= 16 {
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint16(dstBuf, uint16(n))
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeUint16, minValue, maxValue
|
||||
}
|
||||
if minBitSize <= 32 {
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint32(dstBuf, uint32(n))
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeUint32, minValue, maxValue
|
||||
}
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalUint64(dstBuf, n)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeUint64, minValue, maxValue
|
||||
}
|
||||
|
||||
func tryDictEncoding(dstBuf []byte, dstValues, srcValues []string, dict *valuesDict) ([]byte, []string, valueType) {
|
||||
dict.reset()
|
||||
dstBufOrig := dstBuf
|
||||
dstValuesOrig := dstValues
|
||||
|
||||
for _, v := range srcValues {
|
||||
id, ok := dict.getOrAdd(v)
|
||||
if !ok {
|
||||
dict.reset()
|
||||
return dstBufOrig, dstValuesOrig, valueTypeUnknown
|
||||
}
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = append(dstBuf, id)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeDict
|
||||
}
|
||||
|
||||
type valuesDict struct {
|
||||
values []string
|
||||
}
|
||||
|
||||
func (vd *valuesDict) reset() {
|
||||
vs := vd.values
|
||||
for i := range vs {
|
||||
vs[i] = ""
|
||||
}
|
||||
vd.values = vs[:0]
|
||||
}
|
||||
|
||||
func (vd *valuesDict) copyFrom(src *valuesDict) {
|
||||
vd.reset()
|
||||
|
||||
vd.values = append(vd.values[:0], src.values...)
|
||||
}
|
||||
|
||||
func (vd *valuesDict) getOrAdd(k string) (byte, bool) {
|
||||
if len(k) > maxDictSizeBytes {
|
||||
return 0, false
|
||||
}
|
||||
vs := vd.values
|
||||
dictSizeBytes := 0
|
||||
for i, v := range vs {
|
||||
if k == v {
|
||||
return byte(i), true
|
||||
}
|
||||
dictSizeBytes += len(v)
|
||||
}
|
||||
if len(vs) >= maxDictLen || dictSizeBytes+len(k) > maxDictSizeBytes {
|
||||
return 0, false
|
||||
}
|
||||
vs = append(vs, k)
|
||||
vd.values = vs
|
||||
|
||||
return byte(len(vs) - 1), true
|
||||
}
|
||||
|
||||
func (vd *valuesDict) marshal(dst []byte) []byte {
|
||||
values := vd.values
|
||||
if len(values) > maxDictLen {
|
||||
logger.Panicf("BUG: valuesDict may contain max %d items; got %d items", maxDictLen, len(values))
|
||||
}
|
||||
dst = append(dst, byte(len(values)))
|
||||
for _, v := range values {
|
||||
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(v))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func (vd *valuesDict) unmarshal(src []byte) ([]byte, error) {
|
||||
vd.reset()
|
||||
|
||||
srcOrig := src
|
||||
if len(src) < 1 {
|
||||
return srcOrig, fmt.Errorf("cannot umarshal dict len from 0 bytes; need at least 1 byte")
|
||||
}
|
||||
dictLen := int(src[0])
|
||||
src = src[1:]
|
||||
for i := 0; i < dictLen; i++ {
|
||||
tail, data, err := encoding.UnmarshalBytes(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot umarshal value %d out of %d from dict: %w", i, dictLen, err)
|
||||
}
|
||||
src = tail
|
||||
// Do not use bytesutil.InternBytes(data) here, since it works slower than the string(data) in prod
|
||||
v := string(data)
|
||||
vd.values = append(vd.values, v)
|
||||
}
|
||||
return src, nil
|
||||
}
|
||||
|
||||
// maxDictSizeBytes is the maximum length of all the keys in the valuesDict
|
||||
const maxDictSizeBytes = 256
|
||||
|
||||
// maxDictLen is the maximum number of entries in the valuesDict.
|
||||
//
|
||||
// it shouldn't exceed 255, since the dict len is marshaled into a single byte.
|
||||
const maxDictLen = 8
|
228
lib/logstorage/values_encoder_test.go
Normal file
228
lib/logstorage/values_encoder_test.go
Normal file
|
@ -0,0 +1,228 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestValuesEncoder(t *testing.T) {
|
||||
f := func(values []string, expectedValueType valueType, expectedMinValue, expectedMaxValue uint64) {
|
||||
t.Helper()
|
||||
ve := getValuesEncoder()
|
||||
var dict valuesDict
|
||||
vt, minValue, maxValue := ve.encode(values, &dict)
|
||||
if vt != expectedValueType {
|
||||
t.Fatalf("unexpected value type; got %d; want %d", vt, expectedValueType)
|
||||
}
|
||||
if minValue != expectedMinValue {
|
||||
t.Fatalf("unexpected minValue; got %d; want %d", minValue, expectedMinValue)
|
||||
}
|
||||
if maxValue != expectedMaxValue {
|
||||
t.Fatalf("unexpected maxValue; got %d; want %d", maxValue, expectedMaxValue)
|
||||
}
|
||||
encodedValues := append([]string{}, ve.values...)
|
||||
putValuesEncoder(ve)
|
||||
|
||||
vd := getValuesDecoder()
|
||||
if err := vd.decodeInplace(encodedValues, vt, &dict); err != nil {
|
||||
t.Fatalf("unexpected error in decodeInplace(): %s", err)
|
||||
}
|
||||
if len(values) == 0 {
|
||||
values = []string{}
|
||||
}
|
||||
if !reflect.DeepEqual(values, encodedValues) {
|
||||
t.Fatalf("unexpected values decoded\ngot\n%q\nwant\n%q", encodedValues, values)
|
||||
}
|
||||
putValuesDecoder(vd)
|
||||
}
|
||||
|
||||
// An empty values list
|
||||
f(nil, valueTypeString, 0, 0)
|
||||
|
||||
// string values
|
||||
values := make([]string, maxDictLen+1)
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("value_%d", i)
|
||||
}
|
||||
f(values, valueTypeString, 0, 0)
|
||||
|
||||
// dict values
|
||||
f([]string{"foobar"}, valueTypeDict, 0, 0)
|
||||
f([]string{"foo", "bar"}, valueTypeDict, 0, 0)
|
||||
f([]string{"1", "2foo"}, valueTypeDict, 0, 0)
|
||||
|
||||
// uint8 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("%d", i+1)
|
||||
}
|
||||
f(values, valueTypeUint8, 1, uint64(len(values)))
|
||||
|
||||
// uint16 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("%d", (i+1)<<8)
|
||||
}
|
||||
f(values, valueTypeUint16, 1<<8, uint64(len(values)<<8))
|
||||
|
||||
// uint32 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("%d", (i+1)<<16)
|
||||
}
|
||||
f(values, valueTypeUint32, 1<<16, uint64(len(values)<<16))
|
||||
|
||||
// uint64 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("%d", (i+1)<<32)
|
||||
}
|
||||
f(values, valueTypeUint64, 1<<32, uint64(len(values)<<32))
|
||||
|
||||
// ipv4 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("1.2.3.%d", i)
|
||||
}
|
||||
f(values, valueTypeIPv4, 16909056, 16909064)
|
||||
|
||||
// iso8601 timestamps
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("2011-04-19T03:44:01.%03dZ", i)
|
||||
}
|
||||
f(values, valueTypeTimestampISO8601, 1303184641000000000, 1303184641008000000)
|
||||
|
||||
// float64 values
|
||||
for i := range values {
|
||||
values[i] = fmt.Sprintf("%g", math.Sqrt(float64(i+1)))
|
||||
}
|
||||
f(values, valueTypeFloat64, 4607182418800017408, 4613937818241073152)
|
||||
}
|
||||
|
||||
func TestTryParseIPv4(t *testing.T) {
|
||||
f := func(s string, nExpected uint32, okExpected bool) {
|
||||
t.Helper()
|
||||
n, ok := tryParseIPv4(s)
|
||||
if n != nExpected {
|
||||
t.Fatalf("unexpected n; got %d; want %d", n, nExpected)
|
||||
}
|
||||
if ok != okExpected {
|
||||
t.Fatalf("unexpected ok; got %v; want %v", ok, okExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("", 0, false)
|
||||
f("foo", 0, false)
|
||||
f("a.b.c.d", 0, false)
|
||||
f("1.2.3.4", 0x01020304, true)
|
||||
f("255.255.255.255", 0xffffffff, true)
|
||||
f("0.0.0.0", 0, true)
|
||||
f("127.0.0.1", 0x7f000001, true)
|
||||
f("127.0.0.x", 0, false)
|
||||
f("127.0.x.0", 0, false)
|
||||
f("127.x.0.0", 0, false)
|
||||
f("x.0.0.0", 0, false)
|
||||
f("127.127.127.256", 0, false)
|
||||
f("127.127.256.127", 0, false)
|
||||
f("127.256.127.127", 0, false)
|
||||
f("256.127.127.127", 0, false)
|
||||
f("-1.127.127.127", 0, false)
|
||||
f("127.-1.127.127", 0, false)
|
||||
f("127.127.-1.127", 0, false)
|
||||
f("127.127.127.-1", 0, false)
|
||||
}
|
||||
|
||||
func TestTryParseTimestampISO8601(t *testing.T) {
|
||||
f := func(s string, timestampExpected uint64, okExpected bool) {
|
||||
t.Helper()
|
||||
timestamp, ok := tryParseTimestampISO8601(s)
|
||||
if timestamp != timestampExpected {
|
||||
t.Fatalf("unexpected timestamp; got %d; want %d", timestamp, timestampExpected)
|
||||
}
|
||||
if ok != okExpected {
|
||||
t.Fatalf("unexpected ok; got %v; want %v", ok, okExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("2023-01-15T23:45:51.123Z", 1673826351123000000, true)
|
||||
|
||||
// Invalid milliseconds
|
||||
f("2023-01-15T22:15:51.12345Z", 0, false)
|
||||
f("2023-01-15T22:15:51.12Z", 0, false)
|
||||
f("2023-01-15T22:15:51Z", 0, false)
|
||||
|
||||
// Missing Z
|
||||
f("2023-01-15T23:45:51.123", 0, false)
|
||||
|
||||
// Invalid timestamp
|
||||
f("foo", 0, false)
|
||||
f("2023-01-15T23:45:51.123Zxyabcd", 0, false)
|
||||
f("2023-01-15T23:45:51.123Z01:00", 0, false)
|
||||
|
||||
// timestamp with timezone
|
||||
f("2023-01-16T00:45:51.123+01:00", 0, false)
|
||||
}
|
||||
|
||||
func TestTryParseFloat64(t *testing.T) {
|
||||
f := func(s string, valueExpected float64, okExpected bool) {
|
||||
t.Helper()
|
||||
|
||||
value, ok := tryParseFloat64(s)
|
||||
if value != valueExpected {
|
||||
t.Fatalf("unexpected value; got %v; want %v", value, valueExpected)
|
||||
}
|
||||
if ok != okExpected {
|
||||
t.Fatalf("unexpected ok; got %v; want %v", ok, okExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("0", 0, true)
|
||||
f("1234567890", 1234567890, true)
|
||||
f("-1.234567", -1.234567, true)
|
||||
|
||||
// Empty value
|
||||
f("", 0, false)
|
||||
|
||||
// Plus in the value isn't allowed, since it cannot be convered back to the same string representation
|
||||
f("+123", 0, false)
|
||||
|
||||
// Dot at the beginning and the end of value isn't allowed, since it cannot converted back to the same string representation
|
||||
f(".123", 0, false)
|
||||
f("123.", 0, false)
|
||||
|
||||
// Multiple dots aren't allowed
|
||||
f("123.434.55", 0, false)
|
||||
|
||||
// Invalid dots
|
||||
f("-.123", 0, false)
|
||||
f(".", 0, false)
|
||||
|
||||
// Scientific notation isn't allowed, since it cannot be converted back to the same string representation
|
||||
f("12e5", 0, false)
|
||||
|
||||
// Minus in the middle of string isn't allowed
|
||||
f("12-5", 0, false)
|
||||
}
|
||||
|
||||
func TestTryParseUint64(t *testing.T) {
|
||||
f := func(s string, valueExpected uint64, okExpected bool) {
|
||||
t.Helper()
|
||||
|
||||
value, ok := tryParseUint64(s)
|
||||
if value != valueExpected {
|
||||
t.Fatalf("unexpected value; got %d; want %d", value, valueExpected)
|
||||
}
|
||||
if ok != okExpected {
|
||||
t.Fatalf("unexpected ok; got %v; want %v", ok, okExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("0", 0, true)
|
||||
f("123456789012345678", 123456789012345678, true)
|
||||
|
||||
// empty value
|
||||
f("", 0, false)
|
||||
|
||||
// too big value
|
||||
f("1234567890123456789", 0, false)
|
||||
|
||||
// invalid value
|
||||
f("foo", 0, false)
|
||||
}
|
98
lib/logstorage/values_encoder_timing_test.go
Normal file
98
lib/logstorage/values_encoder_timing_test.go
Normal file
|
@ -0,0 +1,98 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkTryParseTimestampISO8601(b *testing.B) {
|
||||
a := []string{
|
||||
"2023-01-15T23:45:51.123Z",
|
||||
"2023-02-15T23:45:51.123Z",
|
||||
"2023-02-15T23:45:51.123+01:00",
|
||||
"2023-02-15T22:45:51.123-10:30",
|
||||
"2023-02-15T22:45:51.000Z",
|
||||
}
|
||||
|
||||
b.SetBytes(int64(len(a)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
_, ok := tryParseTimestampISO8601(s)
|
||||
if !ok {
|
||||
panic(fmt.Errorf("cannot parse timestamp %q", s))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkTryParseIPv4(b *testing.B) {
|
||||
a := []string{
|
||||
"1.2.3.4",
|
||||
"127.0.0.1",
|
||||
"255.255.255.255",
|
||||
"192.43.234.22",
|
||||
"32.34.54.198",
|
||||
}
|
||||
|
||||
b.SetBytes(int64(len(a)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
_, ok := tryParseIPv4(s)
|
||||
if !ok {
|
||||
panic(fmt.Errorf("cannot parse ipv4 %q", s))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkTryParseUint64(b *testing.B) {
|
||||
a := []string{
|
||||
"1234",
|
||||
"483932",
|
||||
"28494",
|
||||
"90012",
|
||||
"889111",
|
||||
}
|
||||
|
||||
b.SetBytes(int64(len(a)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
_, ok := tryParseUint64(s)
|
||||
if !ok {
|
||||
panic(fmt.Errorf("cannot parse uint %q", s))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkTryParseFloat64(b *testing.B) {
|
||||
a := []string{
|
||||
"1.234",
|
||||
"4.545",
|
||||
"456.5645",
|
||||
"-123.434",
|
||||
"434.322",
|
||||
}
|
||||
|
||||
b.SetBytes(int64(len(a)))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
_, ok := tryParseFloat64(s)
|
||||
if !ok {
|
||||
panic(fmt.Errorf("cannot parse float64 %q", s))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue