Merge branch 'public-single-node' into pmm-6401-read-prometheus-data-files

This commit is contained in:
Aliaksandr Valialkin 2020-12-19 16:43:03 +02:00
commit 8593358965
38 changed files with 992 additions and 297 deletions

View file

@ -10,6 +10,8 @@ endif
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)' GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'
.PHONY: $(MAKECMDGOALS)
all: \ all: \
victoria-metrics-prod \ victoria-metrics-prod \
vmagent-prod \ vmagent-prod \

View file

@ -28,22 +28,22 @@ See [features available for enterprise customers](https://victoriametrics.com/en
## Case studies and talks ## Case studies and talks
Click on a link in order to read the corresponding case study Alphabetically sorted links to case studies:
* [zhihu](https://victoriametrics.github.io/CaseStudies.html#zhihu)
* [adidas](https://victoriametrics.github.io/CaseStudies.html#adidas) * [adidas](https://victoriametrics.github.io/CaseStudies.html#adidas)
* [CERN](https://victoriametrics.github.io/CaseStudies.html#cern)
* [COLOPL](https://victoriametrics.github.io/CaseStudies.html#colopl)
* [Zerodha](https://victoriametrics.github.io/CaseStudies.html#zerodha)
* [Wix.com](https://victoriametrics.github.io/CaseStudies.html#wixcom)
* [Wedos.com](https://victoriametrics.github.io/CaseStudies.html#wedoscom)
* [Synthesio](https://victoriametrics.github.io/CaseStudies.html#synthesio)
* [MHI Vestas Offshore Wind](https://victoriametrics.github.io/CaseStudies.html#mhi-vestas-offshore-wind)
* [Dreamteam](https://victoriametrics.github.io/CaseStudies.html#dreamteam)
* [Brandwatch](https://victoriametrics.github.io/CaseStudies.html#brandwatch)
* [Adsterra](https://victoriametrics.github.io/CaseStudies.html#adsterra) * [Adsterra](https://victoriametrics.github.io/CaseStudies.html#adsterra)
* [ARNES](https://victoriametrics.github.io/CaseStudies.html#arnes) * [ARNES](https://victoriametrics.github.io/CaseStudies.html#arnes)
* [Brandwatch](https://victoriametrics.github.io/CaseStudies.html#brandwatch)
* [CERN](https://victoriametrics.github.io/CaseStudies.html#cern)
* [COLOPL](https://victoriametrics.github.io/CaseStudies.html#colopl)
* [Dreamteam](https://victoriametrics.github.io/CaseStudies.html#dreamteam)
* [Idealo.de](https://victoriametrics.github.io/CaseStudies.html#idealode) * [Idealo.de](https://victoriametrics.github.io/CaseStudies.html#idealode)
* [MHI Vestas Offshore Wind](https://victoriametrics.github.io/CaseStudies.html#mhi-vestas-offshore-wind)
* [Synthesio](https://victoriametrics.github.io/CaseStudies.html#synthesio)
* [Wedos.com](https://victoriametrics.github.io/CaseStudies.html#wedoscom)
* [Wix.com](https://victoriametrics.github.io/CaseStudies.html#wixcom)
* [Zerodha](https://victoriametrics.github.io/CaseStudies.html#zerodha)
* [zhihu](https://victoriametrics.github.io/CaseStudies.html#zhihu)
## Prominent features ## Prominent features

View file

@ -141,11 +141,16 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
updated := make(map[uint64]struct{}) updated := make(map[uint64]struct{})
// update list of active alerts // update list of active alerts
for _, m := range qMetrics { for _, m := range qMetrics {
for k, v := range ar.Labels { // extra labels could contain templates, so we expand them first
// apply extra labels labels, err := expandLabels(m, qFn, ar)
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %s", err)
}
for k, v := range labels {
// apply extra labels to datasource
// so the hash key will be consistent on restore
m.SetLabel(k, v) m.SetLabel(k, v)
} }
h := hash(m) h := hash(m)
if _, ok := updated[h]; ok { if _, ok := updated[h]; ok {
// duplicate may be caused by extra labels // duplicate may be caused by extra labels
@ -158,8 +163,8 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
// update Value field with latest value // update Value field with latest value
a.Value = m.Value a.Value = m.Value
// and re-exec template since Value can be used // and re-exec template since Value can be used
// in templates // in annotations
err = ar.template(a, qFn) a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -200,6 +205,19 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
return nil, nil return nil, nil
} }
func expandLabels(m datasource.Metric, q notifier.QueryFn, ar *AlertingRule) (map[string]string, error) {
metricLabels := make(map[string]string)
for _, l := range m.Labels {
metricLabels[l.Name] = l.Value
}
tpl := notifier.AlertTplData{
Labels: metricLabels,
Value: m.Value,
Expr: ar.Expr,
}
return notifier.ExecTemplate(q, ar.Labels, tpl)
}
func (ar *AlertingRule) toTimeSeries(timestamp time.Time) []prompbmarshal.TimeSeries { func (ar *AlertingRule) toTimeSeries(timestamp time.Time) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries var tss []prompbmarshal.TimeSeries
for _, a := range ar.alerts { for _, a := range ar.alerts {
@ -265,17 +283,9 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time, qFn notif
} }
a.Labels[l.Name] = l.Value a.Labels[l.Name] = l.Value
} }
return a, ar.template(a, qFn)
}
func (ar *AlertingRule) template(a *notifier.Alert, qFn notifier.QueryFn) error {
var err error var err error
a.Labels, err = a.ExecTemplate(qFn, a.Labels)
if err != nil {
return err
}
a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations) a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
return err return a, err
} }
// AlertAPI generates APIAlert object from alert by its id(hash) // AlertAPI generates APIAlert object from alert by its id(hash)

View file

@ -3,6 +3,7 @@ package main
import ( import (
"context" "context"
"errors" "errors"
"reflect"
"strings" "strings"
"testing" "testing"
"time" "time"
@ -464,6 +465,106 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
} }
} }
func TestAlertingRule_Template(t *testing.T) {
testCases := []struct {
rule *AlertingRule
metrics []datasource.Metric
expAlerts map[uint64]*notifier.Alert
}{
{
newTestRuleWithLabels("common", "region", "east"),
[]datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"),
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
Annotations: map[string]string{},
Labels: map[string]string{
alertGroupNameLabel: "",
"region": "east",
"instance": "foo",
},
},
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
Annotations: map[string]string{},
Labels: map[string]string{
alertGroupNameLabel: "",
"region": "east",
"instance": "bar",
},
},
},
},
{
&AlertingRule{
Name: "override label",
Labels: map[string]string{
"instance": "{{ $labels.instance }}",
"region": "east",
},
Annotations: map[string]string{
"summary": `Too high connection number for "{{ $labels.instance }}" for region {{ $labels.region }}`,
"description": `It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
},
[]datasource.Metric{
metricWithValueAndLabels(t, 2, "instance", "foo"),
metricWithValueAndLabels(t, 10, "instance", "bar"),
},
map[uint64]*notifier.Alert{
hash(metricWithLabels(t, "region", "east", "instance", "foo")): {
Labels: map[string]string{
alertGroupNameLabel: "",
"instance": "foo",
"region": "east",
},
Annotations: map[string]string{
"summary": `Too high connection number for "foo" for region east`,
"description": `It is 2 connections for "foo"`,
},
},
hash(metricWithLabels(t, "region", "east", "instance", "bar")): {
Labels: map[string]string{
alertGroupNameLabel: "",
"instance": "bar",
"region": "east",
},
Annotations: map[string]string{
"summary": `Too high connection number for "bar" for region east`,
"description": `It is 10 connections for "bar"`,
},
},
},
},
}
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.GroupID = fakeGroup.ID()
fq.add(tc.metrics...)
if _, err := tc.rule.Exec(context.TODO(), fq, false); err != nil {
t.Fatalf("unexpected err: %s", err)
}
for hash, expAlert := range tc.expAlerts {
gotAlert := tc.rule.alerts[hash]
if gotAlert == nil {
t.Fatalf("alert %d is missing; labels: %v; annotations: %v",
hash, expAlert.Labels, expAlert.Annotations)
}
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
}
if !reflect.DeepEqual(expAlert.Labels, gotAlert.Labels) {
t.Fatalf("expected to have labels %#v; got %#v", expAlert.Labels, gotAlert.Labels)
}
}
})
}
}
func newTestRuleWithLabels(name string, labels ...string) *AlertingRule { func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
r := newTestAlertingRule(name, 0) r := newTestAlertingRule(name, 0)
r.Labels = make(map[string]string) r.Labels = make(map[string]string)

View file

@ -15,8 +15,11 @@ groups:
- alert: ExampleAlertAlwaysFiring - alert: ExampleAlertAlwaysFiring
expr: sum by(job) expr: sum by(job)
(up == 1) (up == 1)
labels:
job: '{{ $labels.job }}'
annotations: annotations:
summary: Instances up {{ range query "up" }} description: Job {{ $labels.job }} is up!
summary: All instances up {{ range query "up" }}
{{ . | label "instance" }} {{ . | label "instance" }}
{{ end }} {{ end }}
- record: handler:requests:rate5m - record: handler:requests:rate5m

View file

@ -52,7 +52,8 @@ func (as AlertState) String() string {
return "inactive" return "inactive"
} }
type alertTplData struct { // AlertTplData is used to execute templating
type AlertTplData struct {
Labels map[string]string Labels map[string]string
Value float64 Value float64
Expr string Expr string
@ -60,25 +61,30 @@ type alertTplData struct {
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Expr }}` const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Expr }}`
// ExecTemplate executes the Alert template for give // ExecTemplate executes the Alert template for given
// map of annotations. // map of annotations.
// Every alert could have a different datasource, so function // Every alert could have a different datasource, so function
// requires a queryFunction as an argument. // requires a queryFunction as an argument.
func (a *Alert) ExecTemplate(q QueryFn, annotations map[string]string) (map[string]string, error) { func (a *Alert) ExecTemplate(q QueryFn, annotations map[string]string) (map[string]string, error) {
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr} tplData := AlertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
return templateAnnotations(annotations, tplData, funcsWithQuery(q)) return templateAnnotations(annotations, tplData, funcsWithQuery(q))
} }
// ExecTemplate executes the given template for given annotations map.
func ExecTemplate(q QueryFn, annotations map[string]string, tpl AlertTplData) (map[string]string, error) {
return templateAnnotations(annotations, tpl, funcsWithQuery(q))
}
// ValidateTemplates validate annotations for possible template error, uses empty data for template population // ValidateTemplates validate annotations for possible template error, uses empty data for template population
func ValidateTemplates(annotations map[string]string) error { func ValidateTemplates(annotations map[string]string) error {
_, err := templateAnnotations(annotations, alertTplData{ _, err := templateAnnotations(annotations, AlertTplData{
Labels: map[string]string{}, Labels: map[string]string{},
Value: 0, Value: 0,
}, tmplFunc) }, tmplFunc)
return err return err
} }
func templateAnnotations(annotations map[string]string, data alertTplData, funcs template.FuncMap) (map[string]string, error) { func templateAnnotations(annotations map[string]string, data AlertTplData, funcs template.FuncMap) (map[string]string, error) {
var builder strings.Builder var builder strings.Builder
var buf bytes.Buffer var buf bytes.Buffer
eg := new(utils.ErrGroup) eg := new(utils.ErrGroup)
@ -99,7 +105,7 @@ func templateAnnotations(annotations map[string]string, data alertTplData, funcs
return r, eg.Err() return r, eg.Err()
} }
func templateAnnotation(dst io.Writer, text string, data alertTplData, funcs template.FuncMap) error { func templateAnnotation(dst io.Writer, text string, data AlertTplData, funcs template.FuncMap) error {
t := template.New("").Funcs(funcs).Option("missingkey=zero") t := template.New("").Funcs(funcs).Option("missingkey=zero")
tpl, err := t.Parse(text) tpl, err := t.Parse(text)
if err != nil { if err != nil {

View file

@ -188,12 +188,23 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r} resultExpected := []netstorage.Result{r}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run("time() offset 1m40s0ms", func(t *testing.T) { t.Run("time() offset 1h40s0ms", func(t *testing.T) {
t.Parallel() t.Parallel()
q := `time() offset 100s` q := `time() offset 1h40s0ms`
r := netstorage.Result{ r := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{800, 1000, 1200, 1400, 1600, 1800}, Values: []float64{-2800, -2600, -2400, -2200, -2000, -1800},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run("time() offset -1h40s0ms", func(t *testing.T) {
t.Parallel()
q := `time() offset -1h40s0ms`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4600, 4800, 5000, 5200, 5400, 5600},
Timestamps: timestampsExpected, Timestamps: timestampsExpected,
} }
resultExpected := []netstorage.Result{r} resultExpected := []netstorage.Result{r}

View file

@ -4,6 +4,7 @@
* [Foiled by the Firewall: A Tale of Transition From Prometheus to VictoriaMetrics](https://www.percona.com/blog/2020/12/01/foiled-by-the-firewall-a-tale-of-transition-from-prometheus-to-victoriametrics/) * [Foiled by the Firewall: A Tale of Transition From Prometheus to VictoriaMetrics](https://www.percona.com/blog/2020/12/01/foiled-by-the-firewall-a-tale-of-transition-from-prometheus-to-victoriametrics/)
* [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/) * [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/)
* [Percona monitoring and management migration from Prometheus to VictoriaMetrics FAQ](https://www.percona.com/blog/2020/12/16/percona-monitoring-and-management-migration-from-prometheus-to-victoriametrics-faq/)
* [Making peace with Prometheus rate()](https://blog.doit-intl.com/making-peace-with-prometheus-rate-43a3ea75c4cf) * [Making peace with Prometheus rate()](https://blog.doit-intl.com/making-peace-with-prometheus-rate-43a3ea75c4cf)
* [Infrastructure monitoring with Prometheus at Zerodha](https://zerodha.tech/blog/infra-monitoring-at-zerodha/) * [Infrastructure monitoring with Prometheus at Zerodha](https://zerodha.tech/blog/infra-monitoring-at-zerodha/)
* [Sismology: Iguana Solutions Monitoring System](https://medium.com/@IG1.com/sismology-iguana-solutions-monitoring-system-f46e4170447f) * [Sismology: Iguana Solutions Monitoring System](https://medium.com/@IG1.com/sismology-iguana-solutions-monitoring-system-f46e4170447f)
@ -25,31 +26,50 @@
## Our articles ## Our articles
### Announcements
* [Open-sourcing VictoriaMetrics](https://medium.com/@valyala/open-sourcing-victoriametrics-f31e34485c2b) * [Open-sourcing VictoriaMetrics](https://medium.com/@valyala/open-sourcing-victoriametrics-f31e34485c2b)
* [How we created VictoriaMetrics](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac) * [How we created VictoriaMetrics](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac)
* [Anomaly Detection in VictoriaMetrics](https://medium.com/@VictoriaMetrics/anomaly-detection-in-victoriametrics-9528538786a7)
### Benchmarks
* [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 40K unique time series](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) * [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 40K unique time series](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
* [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 400K, 4M and 40M unique time series](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) * [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 400K, 4M and 40M unique time series](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
* [Insert benchmarks for VictoriaMetrics vs InfluxDB on high-cardinality data](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) * [Insert benchmarks for VictoriaMetrics vs InfluxDB on high-cardinality data](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
* [Measuring vertical scalability for time series databases in Google Cloud](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) * [Measuring vertical scalability for time series databases in Google Cloud](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
* [How VictoriaMetrics creates instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
* [Prometheus Subqueries in VictoriaMetrics](https://medium.com/@valyala/prometheus-subqueries-in-victoriametrics-9b1492b720b3)
* [Why irate from Prometheus doesn't capture spikes](https://medium.com/@valyala/why-irate-from-prometheus-doesnt-capture-spikes-45f9896d7832)
* [Why mmap'ed files in Go may hurt performance](https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d)
* [WAL Usage Looks Broken in Modern TSDBs](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704)
* [Analyzing Prometheus data with external tools](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639)
* [Stripping dependency bloat in VictoriaMetrics Docker image](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
* [PromQL tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
* [Achieving better compression for time series data than Gorilla](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
* [Comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
* [Speeding up backups for big time series databases](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883)
* [Evaluation performance and correctness: VictoriaMetrics response](https://medium.com/@valyala/evaluating-performance-and-correctness-victoriametrics-response-e27315627e87)
* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da) * [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
* [How to migrate data from Prometheus to VictoriaMetrics](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
* [Filtering and modifying time series during import to VictoriaMetrics](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-filtering-and-modifying-time-series-6d40cea4bf21)
* [Anomaly Detection in VictoriaMetrics](https://medium.com/@VictoriaMetrics/anomaly-detection-in-victoriametrics-9528538786a7)
* [How to use relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
* [First look at performance comparison between InfluxDB IOx and VictoriaMetrics](https://medium.com/@VictoriaMetrics/first-look-at-perfomance-comparassion-between-influxdb-iox-and-victoriametrics-e590f847935b) * [First look at performance comparison between InfluxDB IOx and VictoriaMetrics](https://medium.com/@VictoriaMetrics/first-look-at-perfomance-comparassion-between-influxdb-iox-and-victoriametrics-e590f847935b)
* [Prometheus vs VictoriaMetrics benchmark on node-exporter metrics](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) * [Prometheus vs VictoriaMetrics benchmark on node-exporter metrics](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
* [Promscale vs VictoriaMetrics: resource usage on production workload](https://valyala.medium.com/promscale-vs-victoriametrics-resource-usage-on-production-workload-91c8e3786c03)
### Technical articles
* [How VictoriaMetrics creates instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
* [WAL Usage Looks Broken in Modern TSDBs](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704)
* [Why mmap'ed files in Go may hurt performance](https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d)
* [Achieving better compression for time series data than Gorilla](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
* [Stripping dependency bloat in VictoriaMetrics Docker image](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
* [Speeding up backups for big time series databases](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883)
* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
* [Why irate from Prometheus doesn't capture spikes](https://medium.com/@valyala/why-irate-from-prometheus-doesnt-capture-spikes-45f9896d7832)
### Tutorials, guides and how-to articles
* [PromQL tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085)
* [Analyzing Prometheus data with external tools](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639)
* [Prometheus Subqueries in VictoriaMetrics](https://medium.com/@valyala/prometheus-subqueries-in-victoriametrics-9b1492b720b3)
* [How to migrate data from Prometheus to VictoriaMetrics](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-d44a6728f043)
* [Filtering and modifying time series during import to VictoriaMetrics](https://medium.com/@romanhavronenko/victoriametrics-how-to-migrate-data-from-prometheus-filtering-and-modifying-time-series-6d40cea4bf21)
* [How to use relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
* [How to monitor Go applications with VictoriaMetrics](https://victoriametrics.medium.com/how-to-monitor-go-applications-with-victoriametrics-c04703110870) * [How to monitor Go applications with VictoriaMetrics](https://victoriametrics.medium.com/how-to-monitor-go-applications-with-victoriametrics-c04703110870)
* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
### Other articles
* [Comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
* [Evaluation performance and correctness: VictoriaMetrics response](https://medium.com/@valyala/evaluating-performance-and-correctness-victoriametrics-response-e27315627e87)

View file

@ -3,6 +3,15 @@
# tip # tip
# [v1.50.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.50.2)
* FEATURE: do not publish duplicate Docker images with `-cluster` tag suffix for [vmagent](https://victoriametrics.github.io/vmagent.html), [vmalert](https://victoriametrics.github.io/vmalert.html), [vmauth](https://victoriametrics.github.io/vmauth.html), [vmbackup](https://victoriametrics.github.io/vmbackup.html) and [vmrestore](https://victoriametrics.github.io/vmrestore.html), since they are identical to images without `-cluster` tag suffix.
* BUGFIX: vmalert: properly populate template variables. This has been broken in v1.50.0. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/974
* BUGFIX: properly parse negative combined duration in MetricsQL such as `-1h3m4s`. It must be parsed as `-(1h + 3m + 4s)`. Prevsiously it was parsed as `-1h + 3m + 4s`.
* BUGFIX: properly parse lines in [Prometheus exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md) and in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md) with whitespace after the timestamp. For example, `foo 123 456 # some comment here`. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/970
# [v1.50.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.50.1) # [v1.50.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.50.1)
* FEATURE: vmagent: export `vmagent_remotewrite_blocks_sent_total` and `vmagent_remotewrite_blocks_sent_total` metrics for each `-remoteWrite.url`. * FEATURE: vmagent: export `vmagent_remotewrite_blocks_sent_total` and `vmagent_remotewrite_blocks_sent_total` metrics for each `-remoteWrite.url`.

View file

@ -5,20 +5,23 @@ and feel free asking for references, reviews and additional case studies from re
See also [articles about VictoriaMetrics from our users](https://victoriametrics.github.io/Articles.html#third-party-articles-and-slides). See also [articles about VictoriaMetrics from our users](https://victoriametrics.github.io/Articles.html#third-party-articles-and-slides).
Alphabetically sorted links to case studies:
* [zhihu](#zhihu)
* [adidas](#adidas) * [adidas](#adidas)
* [CERN](#cern)
* [COLOPL](#colopl)
* [Zerodha](#zerodha)
* [Wix.com](#wixcom)
* [Wedos.com](#wedoscom)
* [Synthesio](#synthesio)
* [Dreamteam](#dreamteam)
* [Brandwatch](#brandwatch)
* [Adsterra](#adsterra) * [Adsterra](#adsterra)
* [ARNES](#arnes) * [ARNES](#arnes)
* [Brandwatch](#brandwatch)
* [CERN](#cern)
* [COLOPL](#colopl)
* [Dreamteam](#dreamteam)
* [Idealo.de](#idealode) * [Idealo.de](#idealode)
* [MHI Vestas Offshore Wind](#mhi-vestas-offshore-wind)
* [Synthesio](#synthesio)
* [Wedos.com](#wedoscom)
* [Wix.com](#wixcom)
* [Zerodha](#zerodha)
* [zhihu](#zhihu)
## zhihu ## zhihu

View file

@ -122,7 +122,7 @@ ROOT_IMAGE=scratch make package
## Operation ## Operation
### Cluster setup ## Cluster setup
A minimal cluster must contain the following nodes: A minimal cluster must contain the following nodes:
@ -141,7 +141,7 @@ Ports may be altered by setting `-httpListenAddr` on the corresponding nodes.
It is recommended setting up [monitoring](#monitoring) for the cluster. It is recommended setting up [monitoring](#monitoring) for the cluster.
#### Environment variables ### Environment variables
Each flag values can be set thru environment variables by following these rules: Each flag values can be set thru environment variables by following these rules:
@ -151,7 +151,7 @@ Each flag values can be set thru environment variables by following these rules:
- It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_` - It is possible setting prefix for environment vars with `-envflag.prefix`. For instance, if `-envflag.prefix=VM_`, then env vars must be prepended with `VM_`
### Monitoring ## Monitoring
All the cluster components expose various metrics in Prometheus-compatible format at `/metrics` page on the TCP port set in `-httpListenAddr` command-line flag. All the cluster components expose various metrics in Prometheus-compatible format at `/metrics` page on the TCP port set in `-httpListenAddr` command-line flag.
By default the following TCP ports are used: By default the following TCP ports are used:
@ -165,7 +165,7 @@ with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafan
or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11831). or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11831).
### URL format ## URL format
* URLs for data ingestion: `http://<vminsert>:8480/insert/<accountID>/<suffix>`, where: * URLs for data ingestion: `http://<vminsert>:8480/insert/<accountID>/<suffix>`, where:
- `<accountID>` is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). It is possible to set it as `accountID:projectID`, - `<accountID>` is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). It is possible to set it as `accountID:projectID`,
@ -231,7 +231,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
across `vmstorage` nodes. across `vmstorage` nodes.
### Cluster resizing and scalability ## Cluster resizing and scalability
Cluster performance and capacity scales with adding new nodes. Cluster performance and capacity scales with adding new nodes.
@ -250,7 +250,7 @@ Steps to add `vmstorage` node:
3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8400`. 3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8400`.
### Updating / reconfiguring cluster nodes ## Updating / reconfiguring cluster nodes
All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown. All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown.
Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version
@ -260,7 +260,7 @@ Cluster should remain in working state if at least a single node of each type re
the update process. See [cluster availability](#cluster-availability) section for details. the update process. See [cluster availability](#cluster-availability) section for details.
### Cluster availability ## Cluster availability
* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes. * HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
* The cluster remains available if at least a single `vmstorage` node exists: * The cluster remains available if at least a single `vmstorage` node exists:
@ -271,11 +271,11 @@ the update process. See [cluster availability](#cluster-availability) section fo
Data replication can be used for increasing storage durability. See [these docs](#replication-and-data-safety) for details. Data replication can be used for increasing storage durability. See [these docs](#replication-and-data-safety) for details.
### Capacity planning ## Capacity planning
Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the most suitable hardware. Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the most suitable hardware.
#### vminsert ### vminsert
* The recommended total number of vCPU cores for all the `vminsert` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`. * The recommended total number of vCPU cores for all the `vminsert` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`.
* The recommended number of vCPU cores per each `vminsert` instance should equal to the number of `vmstorage` instances in the cluster. * The recommended number of vCPU cores per each `vminsert` instance should equal to the number of `vmstorage` instances in the cluster.
@ -285,7 +285,7 @@ Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the mos
* Sometimes `-rpc.disableCompression` command-line flag on `vminsert` instances could increase ingestion capacity at the cost * Sometimes `-rpc.disableCompression` command-line flag on `vminsert` instances could increase ingestion capacity at the cost
of higher network bandwidth usage between `vminsert` and `vmstorage`. of higher network bandwidth usage between `vminsert` and `vmstorage`.
#### vmstorage ### vmstorage
* The recommended total number of vCPU cores for all the `vmstorage` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`. * The recommended total number of vCPU cores for all the `vmstorage` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`.
* The recommended total amount of RAM for all the `vmstorage` instances can be calculated from the number of active time series: `RAM = 2 * active_time_series * 1KB`. * The recommended total amount of RAM for all the `vmstorage` instances can be calculated from the number of active time series: `RAM = 2 * active_time_series * 1KB`.
@ -299,7 +299,7 @@ Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the mos
* The recommended total amount of storage space for all the `vmstorage` instances can be calculated * The recommended total amount of storage space for all the `vmstorage` instances can be calculated
from the ingestion rate and retention: `storage_space = ingestion_rate * retention_seconds`. from the ingestion rate and retention: `storage_space = ingestion_rate * retention_seconds`.
#### vmselect ### vmselect
The recommended hardware for `vmselect` instances highly depends on the type of queries. Lightweight queries over small number of time series usually require The recommended hardware for `vmselect` instances highly depends on the type of queries. Lightweight queries over small number of time series usually require
small number of vCPU cores and small amount of RAM on `vmselect`, while heavy queries over big number of time series (>10K) usually require small number of vCPU cores and small amount of RAM on `vmselect`, while heavy queries over big number of time series (>10K) usually require
@ -309,7 +309,7 @@ In general it is recommended increasing the number of vCPU cores and RAM per `vm
while adding new `vmselect` nodes only when old nodes are overloaded with incoming query stream. while adding new `vmselect` nodes only when old nodes are overloaded with incoming query stream.
### High availability ## High availability
It is recommended to run all the components for a single cluster in the same subnetwork with high bandwidth, low latency and low error rates. It is recommended to run all the components for a single cluster in the same subnetwork with high bandwidth, low latency and low error rates.
This improves cluster performance and availability. This improves cluster performance and availability.
@ -321,18 +321,18 @@ If you need multi-AZ setup, then it is recommended running independed clusters i
into all the cluster. Then [promxy](https://github.com/jacksontj/promxy) could be used for querying the data from multiple clusters. into all the cluster. Then [promxy](https://github.com/jacksontj/promxy) could be used for querying the data from multiple clusters.
### Helm ## Helm
Helm chart simplifies managing cluster version of VictoriaMetrics in Kubernetes. Helm chart simplifies managing cluster version of VictoriaMetrics in Kubernetes.
It is available in the [helm-charts](https://github.com/VictoriaMetrics/helm-charts) repository. It is available in the [helm-charts](https://github.com/VictoriaMetrics/helm-charts) repository.
### Kubernetes operator ## Kubernetes operator
[K8s operator](https://github.com/VictoriaMetrics/operator) simplifies managing VictoriaMetrics components in Kubernetes. [K8s operator](https://github.com/VictoriaMetrics/operator) simplifies managing VictoriaMetrics components in Kubernetes.
### Replication and data safety ## Replication and data safety
In order to enable application-level replication, `-replicationFactor=N` command-line flag must be passed to `vminsert`. In order to enable application-level replication, `-replicationFactor=N` command-line flag must be passed to `vminsert`.
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable. This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
@ -355,7 +355,7 @@ HDD-based persistent disks should be enough for the majority of use cases.
It is recommended using durable replicated persistent volumes in Kubernetes. It is recommended using durable replicated persistent volumes in Kubernetes.
### Backups ## Backups
It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
for protecting from user errors such as accidental data deletion. for protecting from user errors such as accidental data deletion.
@ -376,6 +376,27 @@ Restoring from backup:
3. Start `vmstorage` node. 3. Start `vmstorage` node.
## Profiling
All the cluster components provide the following handlers for [profiling](https://blog.golang.org/profiling-go-programs):
* `http://vminsert:8480/debug/pprof/heap` for memory profile and `http://vminsert:8480/debug/pprof/profile` for CPU profile
* `http://vmselect:8481/debug/pprof/heap` for memory profile and `http://vmselect:8481/debug/pprof/profile` for CPU profile
* `http://vmstorage:8482/debug/pprof/heap` for memory profile and `http://vmstorage:8482/debug/pprof/profile` for CPU profile
Example command for collecting cpu profile from `vmstorage`:
```bash
curl -s http://vmstorage:8482/debug/pprof/profile > cpu.pprof
```
Example command for collecting memory profile from `vminsert`:
```bash
curl -s http://vminsert:8480/debug/pprof/heap > mem.pprof
```
## Community and contributions ## Community and contributions
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle): We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):

View file

@ -143,6 +143,7 @@ The architecture is [optimized for storing and querying large amounts of time se
Yes: Yes:
* [Prometheus vs VictoriaMetrics benchmark on node-exporter metrics](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) * [Prometheus vs VictoriaMetrics benchmark on node-exporter metrics](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
* [Promscale vs VictoriaMetrics: measuring resource usage in production](https://valyala.medium.com/promscale-vs-victoriametrics-resource-usage-on-production-workload-91c8e3786c03)
* [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/) * [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da) * [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
* [Measuring vertical scalability for time series databases: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). * [Measuring vertical scalability for time series databases: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).

View file

@ -28,22 +28,22 @@ See [features available for enterprise customers](https://victoriametrics.com/en
## Case studies and talks ## Case studies and talks
Click on a link in order to read the corresponding case study Alphabetically sorted links to case studies:
* [zhihu](https://victoriametrics.github.io/CaseStudies.html#zhihu)
* [adidas](https://victoriametrics.github.io/CaseStudies.html#adidas) * [adidas](https://victoriametrics.github.io/CaseStudies.html#adidas)
* [CERN](https://victoriametrics.github.io/CaseStudies.html#cern)
* [COLOPL](https://victoriametrics.github.io/CaseStudies.html#colopl)
* [Zerodha](https://victoriametrics.github.io/CaseStudies.html#zerodha)
* [Wix.com](https://victoriametrics.github.io/CaseStudies.html#wixcom)
* [Wedos.com](https://victoriametrics.github.io/CaseStudies.html#wedoscom)
* [Synthesio](https://victoriametrics.github.io/CaseStudies.html#synthesio)
* [MHI Vestas Offshore Wind](https://victoriametrics.github.io/CaseStudies.html#mhi-vestas-offshore-wind)
* [Dreamteam](https://victoriametrics.github.io/CaseStudies.html#dreamteam)
* [Brandwatch](https://victoriametrics.github.io/CaseStudies.html#brandwatch)
* [Adsterra](https://victoriametrics.github.io/CaseStudies.html#adsterra) * [Adsterra](https://victoriametrics.github.io/CaseStudies.html#adsterra)
* [ARNES](https://victoriametrics.github.io/CaseStudies.html#arnes) * [ARNES](https://victoriametrics.github.io/CaseStudies.html#arnes)
* [Brandwatch](https://victoriametrics.github.io/CaseStudies.html#brandwatch)
* [CERN](https://victoriametrics.github.io/CaseStudies.html#cern)
* [COLOPL](https://victoriametrics.github.io/CaseStudies.html#colopl)
* [Dreamteam](https://victoriametrics.github.io/CaseStudies.html#dreamteam)
* [Idealo.de](https://victoriametrics.github.io/CaseStudies.html#idealode) * [Idealo.de](https://victoriametrics.github.io/CaseStudies.html#idealode)
* [MHI Vestas Offshore Wind](https://victoriametrics.github.io/CaseStudies.html#mhi-vestas-offshore-wind)
* [Synthesio](https://victoriametrics.github.io/CaseStudies.html#synthesio)
* [Wedos.com](https://victoriametrics.github.io/CaseStudies.html#wedoscom)
* [Wix.com](https://victoriametrics.github.io/CaseStudies.html#wixcom)
* [Zerodha](https://victoriametrics.github.io/CaseStudies.html#zerodha)
* [zhihu](https://victoriametrics.github.io/CaseStudies.html#zhihu)
## Prominent features ## Prominent features

4
go.mod
View file

@ -9,7 +9,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.0.9 github.com/VictoriaMetrics/fasthttp v1.0.9
github.com/VictoriaMetrics/metrics v1.12.3 github.com/VictoriaMetrics/metrics v1.12.3
github.com/VictoriaMetrics/metricsql v0.9.0 github.com/VictoriaMetrics/metricsql v0.9.1
github.com/aws/aws-sdk-go v1.36.7 github.com/aws/aws-sdk-go v1.36.7
github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 v2.1.1
github.com/go-kit/kit v0.10.0 github.com/go-kit/kit v0.10.0
@ -19,7 +19,7 @@ require (
github.com/valyala/fastjson v1.6.3 github.com/valyala/fastjson v1.6.3
github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand v1.0.0
github.com/valyala/fasttemplate v1.2.1 github.com/valyala/fasttemplate v1.2.1
github.com/valyala/gozstd v1.8.3 github.com/valyala/gozstd v1.9.0
github.com/valyala/histogram v1.1.2 github.com/valyala/histogram v1.1.2
github.com/valyala/quicktemplate v1.6.3 github.com/valyala/quicktemplate v1.6.3
golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5 golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5

8
go.sum
View file

@ -86,8 +86,8 @@ github.com/VictoriaMetrics/fasthttp v1.0.9/go.mod h1:3SeUL4zwB/p/a9aEeRc6gdlbrtN
github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metrics v1.12.3 h1:Fe6JHC6MSEKa+BtLhPN8WIvS+HKPzMc2evEpNeCGy7I= github.com/VictoriaMetrics/metrics v1.12.3 h1:Fe6JHC6MSEKa+BtLhPN8WIvS+HKPzMc2evEpNeCGy7I=
github.com/VictoriaMetrics/metrics v1.12.3/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metrics v1.12.3/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metricsql v0.9.0 h1:mO4YmVRVHQmipTHcSMlCJ7Rctsol7vlu1l2ifh+ibqI= github.com/VictoriaMetrics/metricsql v0.9.1 h1:CVl9fSW4pGhv7r9Q54zBPVVIGmwpAWvfo0QybVv+TV8=
github.com/VictoriaMetrics/metricsql v0.9.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= github.com/VictoriaMetrics/metricsql v0.9.1/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM= github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM=
@ -728,8 +728,8 @@ github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2Obdk
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4= github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/valyala/gozstd v1.8.3 h1:nHlS+sCFoNLsZpRPKDviXkhHybaRSUjH2w0P/myYo0I= github.com/valyala/gozstd v1.9.0 h1:Mse1yBM5ZQpigO4KW1bVK8p8VSRYttRF+pV0QsMWZ1Y=
github.com/valyala/gozstd v1.8.3/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ= github.com/valyala/gozstd v1.9.0/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/histogram v1.1.2 h1:vOk5VrGjMBIoPR5k6wA8vBaC8toeJ8XO0yfRjFEc1h8= github.com/valyala/histogram v1.1.2 h1:vOk5VrGjMBIoPR5k6wA8vBaC8toeJ8XO0yfRjFEc1h8=
github.com/valyala/histogram v1.1.2/go.mod h1:CZAr6gK9dbD7hYx2s8WSPh0p5x5wETjC+2b3PJVtEdg= github.com/valyala/histogram v1.1.2/go.mod h1:CZAr6gK9dbD7hYx2s8WSPh0p5x5wETjC+2b3PJVtEdg=
github.com/valyala/quicktemplate v1.6.3 h1:O7EuMwuH7Q94U2CXD6sOX8AYHqQqWtmIk690IhmpkKA= github.com/valyala/quicktemplate v1.6.3 h1:O7EuMwuH7Q94U2CXD6sOX8AYHqQqWtmIk690IhmpkKA=

View file

@ -1305,22 +1305,36 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxItems u
// Sort src parts by itemsCount. // Sort src parts by itemsCount.
sort.Slice(src, func(i, j int) bool { return src[i].p.ph.itemsCount < src[j].p.ph.itemsCount }) sort.Slice(src, func(i, j int) bool { return src[i].p.ph.itemsCount < src[j].p.ph.itemsCount })
n := maxPartsToMerge minSrcParts := (maxPartsToMerge + 1) / 2
if len(src) < n { if minSrcParts < 2 {
n = len(src) minSrcParts = 2
}
maxSrcParts := maxPartsToMerge
if len(src) < maxSrcParts {
maxSrcParts = len(src)
} }
// Exhaustive search for parts giving the lowest write amplification // Exhaustive search for parts giving the lowest write amplification when merged.
// when merged.
var pws []*partWrapper var pws []*partWrapper
maxM := float64(0) maxM := float64(0)
for i := 2; i <= n; i++ { for i := minSrcParts; i <= maxSrcParts; i++ {
for j := 0; j <= len(src)-i; j++ { for j := 0; j <= len(src)-i; j++ {
itemsSum := uint64(0)
a := src[j : j+i] a := src[j : j+i]
if a[0].p.ph.itemsCount*uint64(len(a)) < a[len(a)-1].p.ph.itemsCount {
// Do not merge parts with too big difference in items count,
// since this results in unbalanced merges.
continue
}
itemsSum := uint64(0)
for _, pw := range a { for _, pw := range a {
itemsSum += pw.p.ph.itemsCount itemsSum += pw.p.ph.itemsCount
} }
if itemsSum < 1e6 && len(a) < maxPartsToMerge {
// Do not merge parts with too small number of items if the number of source parts
// isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage
// for small parts merge.
continue
}
if itemsSum > maxItems { if itemsSum > maxItems {
// There is no sense in checking the remaining bigger parts. // There is no sense in checking the remaining bigger parts.
break break

View file

@ -8,7 +8,6 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
@ -744,7 +743,6 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e
// Reduce memory usage by interning all the strings in labels. // Reduce memory usage by interning all the strings in labels.
internLabelStrings(labels) internLabelStrings(labels)
dst = append(dst, &ScrapeWork{ dst = append(dst, &ScrapeWork{
ID: atomic.AddUint64(&nextScrapeWorkID, 1),
ScrapeURL: scrapeURL, ScrapeURL: scrapeURL,
ScrapeInterval: swc.scrapeInterval, ScrapeInterval: swc.scrapeInterval,
ScrapeTimeout: swc.scrapeTimeout, ScrapeTimeout: swc.scrapeTimeout,
@ -764,9 +762,6 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e
return dst, nil return dst, nil
} }
// Each ScrapeWork has an ID, which is used for locating it when updating its status.
var nextScrapeWorkID uint64
func internLabelStrings(labels []prompbmarshal.Label) { func internLabelStrings(labels []prompbmarshal.Label) {
for i := range labels { for i := range labels {
label := &labels[i] label := &labels[i]

View file

@ -442,7 +442,6 @@ scrape_configs:
func resetNonEssentialFields(sws []*ScrapeWork) { func resetNonEssentialFields(sws []*ScrapeWork) {
for i := range sws { for i := range sws {
sws[i].ID = 0
sws[i].OriginalLabels = nil sws[i].OriginalLabels = nil
} }
} }

View file

@ -355,7 +355,7 @@ func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.Wr
stopCh: make(chan struct{}), stopCh: make(chan struct{}),
} }
c := newClient(sw) c := newClient(sw)
sc.sw.Config = *sw sc.sw.Config = sw
sc.sw.ScrapeGroup = group sc.sw.ScrapeGroup = group
sc.sw.ReadData = c.ReadData sc.sw.ReadData = c.ReadData
sc.sw.GetStreamReader = c.GetStreamReader sc.sw.GetStreamReader = c.GetStreamReader

View file

@ -27,10 +27,9 @@ var (
) )
// ScrapeWork represents a unit of work for scraping Prometheus metrics. // ScrapeWork represents a unit of work for scraping Prometheus metrics.
//
// It must be immutable during its lifetime, since it is read from concurrently running goroutines.
type ScrapeWork struct { type ScrapeWork struct {
// Unique ID for the ScrapeWork.
ID uint64
// Full URL (including query args) for the scrape. // Full URL (including query args) for the scrape.
ScrapeURL string ScrapeURL string
@ -144,7 +143,7 @@ func promLabelsString(labels []prompbmarshal.Label) string {
type scrapeWork struct { type scrapeWork struct {
// Config for the scrape. // Config for the scrape.
Config ScrapeWork Config *ScrapeWork
// ReadData is called for reading the data. // ReadData is called for reading the data.
ReadData func(dst []byte) ([]byte, error) ReadData func(dst []byte) ([]byte, error)
@ -308,7 +307,7 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error
// body must be released only after wc is released, since wc refers to body. // body must be released only after wc is released, since wc refers to body.
sw.prevBodyLen = len(body.B) sw.prevBodyLen = len(body.B)
leveledbytebufferpool.Put(body) leveledbytebufferpool.Put(body)
tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, realTimestamp, int64(duration*1000), err) tsmGlobal.Update(sw.Config, sw.ScrapeGroup, up == 1, realTimestamp, int64(duration*1000), err)
return err return err
} }
@ -369,7 +368,7 @@ func (sw *scrapeWork) scrapeStream(scrapeTimestamp, realTimestamp int64) error {
sw.prevRowsLen = len(wc.rows.Rows) sw.prevRowsLen = len(wc.rows.Rows)
wc.reset() wc.reset()
writeRequestCtxPool.Put(wc) writeRequestCtxPool.Put(wc)
tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, realTimestamp, int64(duration*1000), err) tsmGlobal.Update(sw.Config, sw.ScrapeGroup, up == 1, realTimestamp, int64(duration*1000), err)
return nil return nil
} }

View file

@ -49,6 +49,7 @@ func TestScrapeWorkScrapeInternalFailure(t *testing.T) {
timeseriesExpected := parseData(dataExpected) timeseriesExpected := parseData(dataExpected)
var sw scrapeWork var sw scrapeWork
sw.Config = &ScrapeWork{}
readDataCalls := 0 readDataCalls := 0
sw.ReadData = func(dst []byte) ([]byte, error) { sw.ReadData = func(dst []byte) ([]byte, error) {
@ -87,7 +88,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
timeseriesExpected := parseData(dataExpected) timeseriesExpected := parseData(dataExpected)
var sw scrapeWork var sw scrapeWork
sw.Config = *cfg sw.Config = cfg
readDataCalls := 0 readDataCalls := 0
sw.ReadData = func(dst []byte) ([]byte, error) { sw.ReadData = func(dst []byte) ([]byte, error) {

View file

@ -37,6 +37,7 @@ vm_tcplistener_write_calls_total{name="https", addr=":443"} 132356
b.SetBytes(int64(len(data))) b.SetBytes(int64(len(data)))
b.RunParallel(func(pb *testing.PB) { b.RunParallel(func(pb *testing.PB) {
var sw scrapeWork var sw scrapeWork
sw.Config = &ScrapeWork{}
sw.ReadData = readDataFunc sw.ReadData = readDataFunc
sw.PushData = func(wr *prompbmarshal.WriteRequest) {} sw.PushData = func(wr *prompbmarshal.WriteRequest) {}
timestamp := int64(0) timestamp := int64(0)

View file

@ -59,45 +59,49 @@ func WriteAPIV1Targets(w io.Writer, state string) {
type targetStatusMap struct { type targetStatusMap struct {
mu sync.Mutex mu sync.Mutex
m map[uint64]*targetStatus m map[*ScrapeWork]*targetStatus
} }
func newTargetStatusMap() *targetStatusMap { func newTargetStatusMap() *targetStatusMap {
return &targetStatusMap{ return &targetStatusMap{
m: make(map[uint64]*targetStatus), m: make(map[*ScrapeWork]*targetStatus),
} }
} }
func (tsm *targetStatusMap) Reset() { func (tsm *targetStatusMap) Reset() {
tsm.mu.Lock() tsm.mu.Lock()
tsm.m = make(map[uint64]*targetStatus) tsm.m = make(map[*ScrapeWork]*targetStatus)
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) Register(sw *ScrapeWork) { func (tsm *targetStatusMap) Register(sw *ScrapeWork) {
tsm.mu.Lock() tsm.mu.Lock()
tsm.m[sw.ID] = &targetStatus{ tsm.m[sw] = &targetStatus{
sw: *sw, sw: sw,
} }
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) { func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
tsm.mu.Lock() tsm.mu.Lock()
delete(tsm.m, sw.ID) delete(tsm.m, sw)
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) { func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
tsm.mu.Lock() tsm.mu.Lock()
tsm.m[sw.ID] = &targetStatus{ ts := tsm.m[sw]
sw: *sw, if ts == nil {
up: up, ts = &targetStatus{
scrapeGroup: group, sw: sw,
scrapeTime: scrapeTime,
scrapeDuration: scrapeDuration,
err: err,
} }
tsm.m[sw] = ts
}
ts.up = up
ts.scrapeGroup = group
ts.scrapeTime = scrapeTime
ts.scrapeDuration = scrapeDuration
ts.err = err
tsm.mu.Unlock() tsm.mu.Unlock()
} }
@ -123,8 +127,8 @@ func (tsm *targetStatusMap) WriteActiveTargetsJSON(w io.Writer) {
st targetStatus st targetStatus
} }
kss := make([]keyStatus, 0, len(tsm.m)) kss := make([]keyStatus, 0, len(tsm.m))
for _, st := range tsm.m { for sw, st := range tsm.m {
key := promLabelsString(st.sw.OriginalLabels) key := promLabelsString(sw.OriginalLabels)
kss = append(kss, keyStatus{ kss = append(kss, keyStatus{
key: key, key: key,
st: *st, st: *st,
@ -176,7 +180,7 @@ func writeLabelsJSON(w io.Writer, labels []prompbmarshal.Label) {
} }
type targetStatus struct { type targetStatus struct {
sw ScrapeWork sw *ScrapeWork
up bool up bool
scrapeGroup string scrapeGroup string
scrapeTime int64 scrapeTime int64

View file

@ -166,6 +166,8 @@ func (r *Row) unmarshal(s string, tagsPool []Tag, noEscapes bool) ([]Tag, error)
// There is no timestamp - just a whitespace after the value. // There is no timestamp - just a whitespace after the value.
return tagsPool, nil return tagsPool, nil
} }
// There are some whitespaces after timestamp
s = skipTrailingWhitespace(s)
ts, err := fastfloat.Parse(s) ts, err := fastfloat.Parse(s)
if err != nil { if err != nil {
return tagsPool, fmt.Errorf("cannot parse timestamp %q: %w", s, err) return tagsPool, fmt.Errorf("cannot parse timestamp %q: %w", s, err)

View file

@ -205,7 +205,7 @@ cassandra_token_ownership_ratio 78.9`, &Rows{
// Exemplars - see https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md#exemplars-1 // Exemplars - see https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md#exemplars-1
f(`foo_bucket{le="10",a="#b"} 17 # {trace_id="oHg5SJ#YRHA0"} 9.8 1520879607.789 f(`foo_bucket{le="10",a="#b"} 17 # {trace_id="oHg5SJ#YRHA0"} 9.8 1520879607.789
abc 123 456#foobar abc 123 456 # foobar
foo 344#bar`, &Rows{ foo 344#bar`, &Rows{
Rows: []Row{ Rows: []Row{
{ {

View file

@ -1025,22 +1025,45 @@ func (pt *partition) mergeBigParts(isFinal bool) error {
} }
func (pt *partition) mergeSmallParts(isFinal bool) error { func (pt *partition) mergeSmallParts(isFinal bool) error {
maxRows := maxRowsByPath(pt.smallPartsPath) // Try merging small parts to a big part at first.
if maxRows > maxRowsPerSmallPart() {
// The output part may go to big part,
// so make sure it has enough space.
maxBigPartRows := maxRowsByPath(pt.bigPartsPath) maxBigPartRows := maxRowsByPath(pt.bigPartsPath)
if maxRows > maxBigPartRows {
maxRows = maxBigPartRows
}
}
pt.partsLock.Lock() pt.partsLock.Lock()
pws, needFreeSpace := getPartsToMerge(pt.smallParts, maxRows, isFinal) pws, needFreeSpace := getPartsToMerge(pt.smallParts, maxBigPartRows, isFinal)
pt.partsLock.Unlock() pt.partsLock.Unlock()
atomicSetBool(&pt.bigMergeNeedFreeDiskSpace, needFreeSpace)
atomicSetBool(&pt.smallMergeNeedFreeDiskSpace, needFreeSpace) rowsCount := getRowsCount(pws)
if rowsCount > maxRowsPerSmallPart() {
// Merge small parts to a big part.
return pt.mergeParts(pws, pt.stopCh) return pt.mergeParts(pws, pt.stopCh)
}
// Make sure that the output small part fits small parts storage.
maxSmallPartRows := maxRowsByPath(pt.smallPartsPath)
if rowsCount <= maxSmallPartRows {
// Merge small parts to a small part.
return pt.mergeParts(pws, pt.stopCh)
}
// The output small part doesn't fit small parts storage. Try merging small parts according to maxSmallPartRows limit.
pt.releasePartsToMerge(pws)
pt.partsLock.Lock()
pws, needFreeSpace = getPartsToMerge(pt.smallParts, maxSmallPartRows, isFinal)
pt.partsLock.Unlock()
atomicSetBool(&pt.smallMergeNeedFreeDiskSpace, needFreeSpace)
return pt.mergeParts(pws, pt.stopCh)
}
func (pt *partition) releasePartsToMerge(pws []*partWrapper) {
pt.partsLock.Lock()
for _, pw := range pws {
if !pw.isInMerge {
logger.Panicf("BUG: missing isInMerge flag on the part %q", pw.p.path)
}
pw.isInMerge = false
}
pt.partsLock.Unlock()
} }
var errNothingToMerge = fmt.Errorf("nothing to merge") var errNothingToMerge = fmt.Errorf("nothing to merge")
@ -1063,18 +1086,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
// Nothing to merge. // Nothing to merge.
return errNothingToMerge return errNothingToMerge
} }
defer pt.releasePartsToMerge(pws)
defer func() {
// Remove isInMerge flag from pws.
pt.partsLock.Lock()
for _, pw := range pws {
if !pw.isInMerge {
logger.Panicf("BUG: missing isInMerge flag on the part %q", pw.p.path)
}
pw.isInMerge = false
}
pt.partsLock.Unlock()
}()
startTime := time.Now() startTime := time.Now()
@ -1279,7 +1291,7 @@ func removeParts(pws []*partWrapper, partsToRemove map[*partWrapper]bool, isBig
// getPartsToMerge returns optimal parts to merge from pws. // getPartsToMerge returns optimal parts to merge from pws.
// //
// The returned rows will contain less than maxRows rows. // The returned parts will contain less than maxRows rows.
// The function returns true if pws contains parts, which cannot be merged because of maxRows limit. // The function returns true if pws contains parts, which cannot be merged because of maxRows limit.
func getPartsToMerge(pws []*partWrapper, maxRows uint64, isFinal bool) ([]*partWrapper, bool) { func getPartsToMerge(pws []*partWrapper, maxRows uint64, isFinal bool) ([]*partWrapper, bool) {
pwsRemaining := make([]*partWrapper, 0, len(pws)) pwsRemaining := make([]*partWrapper, 0, len(pws))
@ -1345,28 +1357,39 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxRows ui
return a.RowsCount < b.RowsCount return a.RowsCount < b.RowsCount
}) })
n := maxPartsToMerge minSrcParts := (maxPartsToMerge + 1) / 2
if len(src) < n { if minSrcParts < 2 {
n = len(src) minSrcParts = 2
}
maxSrcParts := maxPartsToMerge
if len(src) < maxSrcParts {
maxSrcParts = len(src)
} }
// Exhaustive search for parts giving the lowest write amplification // Exhaustive search for parts giving the lowest write amplification when merged.
// when merged.
var pws []*partWrapper var pws []*partWrapper
maxM := float64(0) maxM := float64(0)
for i := 2; i <= n; i++ { for i := minSrcParts; i <= maxSrcParts; i++ {
for j := 0; j <= len(src)-i; j++ { for j := 0; j <= len(src)-i; j++ {
a := src[j : j+i] a := src[j : j+i]
rowsSum := uint64(0) if a[0].p.ph.RowsCount*uint64(len(a)) < a[len(a)-1].p.ph.RowsCount {
for _, pw := range a { // Do not merge parts with too big difference in rows count,
rowsSum += pw.p.ph.RowsCount // since this results in unbalanced merges.
continue
} }
if rowsSum > maxRows { rowsCount := getRowsCount(a)
if rowsCount < 1e6 && len(a) < maxPartsToMerge {
// Do not merge parts with too small number of rows if the number of source parts
// isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage
// for small parts merge.
continue
}
if rowsCount > maxRows {
// There is no need in verifying remaining parts with higher number of rows // There is no need in verifying remaining parts with higher number of rows
needFreeSpace = true needFreeSpace = true
break break
} }
m := float64(rowsSum) / float64(a[len(a)-1].p.ph.RowsCount) m := float64(rowsCount) / float64(a[len(a)-1].p.ph.RowsCount)
if m < maxM { if m < maxM {
continue continue
} }
@ -1386,6 +1409,14 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxRows ui
return append(dst, pws...), needFreeSpace return append(dst, pws...), needFreeSpace
} }
func getRowsCount(pws []*partWrapper) uint64 {
n := uint64(0)
for _, pw := range pws {
n += pw.p.ph.RowsCount
}
return n
}
func openParts(pathPrefix1, pathPrefix2, path string) ([]*partWrapper, error) { func openParts(pathPrefix1, pathPrefix2, path string) ([]*partWrapper, error) {
// The path can be missing after restoring from backup, so create it if needed. // The path can be missing after restoring from backup, so create it if needed.
if err := fs.MkdirAllIfNotExist(path); err != nil { if err := fs.MkdirAllIfNotExist(path); err != nil {

View file

@ -18,7 +18,7 @@ func TestAppendPartsToMerge(t *testing.T) {
testAppendPartsToMerge(t, 2, []uint64{123}, nil) testAppendPartsToMerge(t, 2, []uint64{123}, nil)
testAppendPartsToMerge(t, 2, []uint64{4, 2}, nil) testAppendPartsToMerge(t, 2, []uint64{4, 2}, nil)
testAppendPartsToMerge(t, 2, []uint64{128, 64, 32, 16, 8, 4, 2, 1}, nil) testAppendPartsToMerge(t, 2, []uint64{128, 64, 32, 16, 8, 4, 2, 1}, nil)
testAppendPartsToMerge(t, 4, []uint64{128, 64, 32, 10, 9, 7, 2, 1}, []uint64{2, 7, 9, 10}) testAppendPartsToMerge(t, 4, []uint64{128, 64, 32, 10, 9, 7, 3, 1}, []uint64{3, 7, 9, 10})
testAppendPartsToMerge(t, 2, []uint64{128, 64, 32, 16, 8, 4, 2, 2}, []uint64{2, 2}) testAppendPartsToMerge(t, 2, []uint64{128, 64, 32, 16, 8, 4, 2, 2}, []uint64{2, 2})
testAppendPartsToMerge(t, 4, []uint64{128, 64, 32, 16, 8, 4, 2, 2}, []uint64{2, 2, 4, 8}) testAppendPartsToMerge(t, 4, []uint64{128, 64, 32, 16, 8, 4, 2, 2}, []uint64{2, 2, 4, 8})
testAppendPartsToMerge(t, 2, []uint64{1, 1}, []uint64{1, 1}) testAppendPartsToMerge(t, 2, []uint64{1, 1}, []uint64{1, 1})
@ -26,7 +26,9 @@ func TestAppendPartsToMerge(t *testing.T) {
testAppendPartsToMerge(t, 2, []uint64{4, 2, 4}, []uint64{4, 4}) testAppendPartsToMerge(t, 2, []uint64{4, 2, 4}, []uint64{4, 4})
testAppendPartsToMerge(t, 2, []uint64{1, 3, 7, 2}, nil) testAppendPartsToMerge(t, 2, []uint64{1, 3, 7, 2}, nil)
testAppendPartsToMerge(t, 3, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3}) testAppendPartsToMerge(t, 3, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3})
testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3}) testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, nil)
testAppendPartsToMerge(t, 4, []uint64{1e6, 3e6, 7e6, 2e6}, []uint64{1e6, 2e6, 3e6})
testAppendPartsToMerge(t, 4, []uint64{2, 3, 7, 2}, []uint64{2, 2, 3, 7})
testAppendPartsToMerge(t, 3, []uint64{11, 1, 10, 100, 10}, []uint64{10, 10, 11}) testAppendPartsToMerge(t, 3, []uint64{11, 1, 10, 100, 10}, []uint64{10, 10, 11})
} }
@ -35,8 +37,9 @@ func TestAppendPartsToMergeManyParts(t *testing.T) {
// using minimum merges. // using minimum merges.
var a []uint64 var a []uint64
maxOutPartRows := uint64(0) maxOutPartRows := uint64(0)
r := rand.New(rand.NewSource(1))
for i := 0; i < 1024; i++ { for i := 0; i < 1024; i++ {
n := uint64(uint32(rand.NormFloat64() * 1e9)) n := uint64(uint32(r.NormFloat64() * 1e9))
if n < 0 { if n < 0 {
n = -n n = -n
} }
@ -83,11 +86,11 @@ func TestAppendPartsToMergeManyParts(t *testing.T) {
rowsTotal += uint64(rc) rowsTotal += uint64(rc)
} }
overhead := float64(rowsMerged) / float64(rowsTotal) overhead := float64(rowsMerged) / float64(rowsTotal)
if overhead > 2.96 { if overhead > 2.1 {
t.Fatalf("too big overhead; rowsCount=%d, iterationsCount=%d, rowsTotal=%d, rowsMerged=%d, overhead=%f", t.Fatalf("too big overhead; rowsCount=%d, iterationsCount=%d, rowsTotal=%d, rowsMerged=%d, overhead=%f",
rowsCount, iterationsCount, rowsTotal, rowsMerged, overhead) rowsCount, iterationsCount, rowsTotal, rowsMerged, overhead)
} }
if len(rowsCount) > 40 { if len(rowsCount) > 18 {
t.Fatalf("too many rowsCount %d; rowsCount=%d, iterationsCount=%d, rowsTotal=%d, rowsMerged=%d, overhead=%f", t.Fatalf("too many rowsCount %d; rowsCount=%d, iterationsCount=%d, rowsTotal=%d, rowsMerged=%d, overhead=%f",
len(rowsCount), rowsCount, iterationsCount, rowsTotal, rowsMerged, overhead) len(rowsCount), rowsCount, iterationsCount, rowsTotal, rowsMerged, overhead)
} }

View file

@ -408,8 +408,12 @@ func isPositiveDuration(s string) bool {
return n == len(s) return n == len(s)
} }
// PositiveDurationValue returns the duration in milliseconds for the given s // PositiveDurationValue returns positive duration in milliseconds for the given s
// and the given step. // and the given step.
//
// Duration in s may be combined, i.e. 2h5m or 2h-5m.
//
// Error is returned if the duration in s is negative.
func PositiveDurationValue(s string, step int64) (int64, error) { func PositiveDurationValue(s string, step int64) (int64, error) {
d, err := DurationValue(s, step) d, err := DurationValue(s, step)
if err != nil { if err != nil {
@ -424,7 +428,7 @@ func PositiveDurationValue(s string, step int64) (int64, error) {
// DurationValue returns the duration in milliseconds for the given s // DurationValue returns the duration in milliseconds for the given s
// and the given step. // and the given step.
// //
// Duration in s may be combined, i.e. 2h5m or 2h-5m. // Duration in s may be combined, i.e. 2h5m, -2h5m or 2h-5m.
// //
// The returned duration value can be negative. // The returned duration value can be negative.
func DurationValue(s string, step int64) (int64, error) { func DurationValue(s string, step int64) (int64, error) {
@ -432,6 +436,7 @@ func DurationValue(s string, step int64) (int64, error) {
return 0, fmt.Errorf("duration cannot be empty") return 0, fmt.Errorf("duration cannot be empty")
} }
var d int64 var d int64
isMinus := false
for len(s) > 0 { for len(s) > 0 {
n := scanSingleDuration(s, true) n := scanSingleDuration(s, true)
if n <= 0 { if n <= 0 {
@ -443,7 +448,13 @@ func DurationValue(s string, step int64) (int64, error) {
if err != nil { if err != nil {
return 0, err return 0, err
} }
if isMinus && dLocal > 0 {
dLocal = -dLocal
}
d += dLocal d += dLocal
if dLocal < 0 {
isMinus = true
}
} }
return d, nil return d, nil
} }

View file

@ -13,20 +13,20 @@ package gozstd
// durting calls from Go. // durting calls from Go.
// See https://github.com/golang/go/issues/24450 . // See https://github.com/golang/go/issues/24450 .
static size_t ZSTD_compressCCtx_wrapper(ZSTD_CCtx* ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, int compressionLevel) { static size_t ZSTD_compressCCtx_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, int compressionLevel) {
return ZSTD_compressCCtx(ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, compressionLevel); return ZSTD_compressCCtx((ZSTD_CCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, compressionLevel);
} }
static size_t ZSTD_compress_usingCDict_wrapper(ZSTD_CCtx* ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, const ZSTD_CDict* cdict) { static size_t ZSTD_compress_usingCDict_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, uintptr_t cdict) {
return ZSTD_compress_usingCDict(ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, cdict); return ZSTD_compress_usingCDict((ZSTD_CCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, (const ZSTD_CDict*)cdict);
} }
static size_t ZSTD_decompressDCtx_wrapper(ZSTD_DCtx* ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize) { static size_t ZSTD_decompressDCtx_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize) {
return ZSTD_decompressDCtx(ctx, (void*)dst, dstCapacity, (const void*)src, srcSize); return ZSTD_decompressDCtx((ZSTD_DCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize);
} }
static size_t ZSTD_decompress_usingDDict_wrapper(ZSTD_DCtx* ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, const ZSTD_DDict *ddict) { static size_t ZSTD_decompress_usingDDict_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, uintptr_t ddict) {
return ZSTD_decompress_usingDDict(ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, ddict); return ZSTD_decompress_usingDDict((ZSTD_DCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, (const ZSTD_DDict*)ddict);
} }
static unsigned long long ZSTD_getFrameContentSize_wrapper(uintptr_t src, size_t srcSize) { static unsigned long long ZSTD_getFrameContentSize_wrapper(uintptr_t src, size_t srcSize) {
@ -149,12 +149,13 @@ func compress(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressi
func compressInternal(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int, mustSucceed bool) C.size_t { func compressInternal(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int, mustSucceed bool) C.size_t {
if cd != nil { if cd != nil {
result := C.ZSTD_compress_usingCDict_wrapper(cctxDict.cctx, result := C.ZSTD_compress_usingCDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(cctxDict.cctx))),
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)), C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
C.size_t(len(src)), C.size_t(len(src)),
cd.p) C.uintptr_t(uintptr(unsafe.Pointer(cd.p))))
// Prevent from GC'ing of dst and src during CGO call above. // Prevent from GC'ing of dst and src during CGO call above.
runtime.KeepAlive(dst) runtime.KeepAlive(dst)
runtime.KeepAlive(src) runtime.KeepAlive(src)
@ -163,7 +164,8 @@ func compressInternal(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, c
} }
return result return result
} }
result := C.ZSTD_compressCCtx_wrapper(cctx.cctx, result := C.ZSTD_compressCCtx_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(cctx.cctx))),
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)), C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
@ -289,14 +291,16 @@ func decompress(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) ([]byte
func decompressInternal(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) C.size_t { func decompressInternal(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) C.size_t {
var n C.size_t var n C.size_t
if dd != nil { if dd != nil {
n = C.ZSTD_decompress_usingDDict_wrapper(dctxDict.dctx, n = C.ZSTD_decompress_usingDDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(dctxDict.dctx))),
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)), C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
C.size_t(len(src)), C.size_t(len(src)),
dd.p) C.uintptr_t(uintptr(unsafe.Pointer(dd.p))))
} else { } else {
n = C.ZSTD_decompressDCtx_wrapper(dctx.dctx, n = C.ZSTD_decompressDCtx_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(dctx.dctx))),
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)), C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,11 +1,30 @@
package gozstd package gozstd
/* /*
#cgo CFLAGS: -O3
#define ZSTD_STATIC_LINKING_ONLY #define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h" #include "zstd.h"
#include "zstd_errors.h" #include "zstd_errors.h"
#include <stdlib.h> // for malloc/free #include <stdlib.h> // for malloc/free
#include <stdint.h> // for uintptr_t
// The following *_wrapper functions allow avoiding memory allocations
// durting calls from Go.
// See https://github.com/golang/go/issues/24450 .
static size_t ZSTD_initDStream_usingDDict_wrapper(uintptr_t ds, uintptr_t dict) {
return ZSTD_initDStream_usingDDict((ZSTD_DStream*)ds, (ZSTD_DDict*)dict);
}
static size_t ZSTD_freeDStream_wrapper(uintptr_t ds) {
return ZSTD_freeDStream((ZSTD_DStream*)ds);
}
static size_t ZSTD_decompressStream_wrapper(uintptr_t ds, uintptr_t output, uintptr_t input) {
return ZSTD_decompressStream((ZSTD_DStream*)ds, (ZSTD_outBuffer*)output, (ZSTD_inBuffer*)input);
}
*/ */
import "C" import "C"
@ -92,7 +111,9 @@ func initDStream(ds *C.ZSTD_DStream, dd *DDict) {
if dd != nil { if dd != nil {
ddict = dd.p ddict = dd.p
} }
result := C.ZSTD_initDStream_usingDDict(ds, ddict) result := C.ZSTD_initDStream_usingDDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(ds))),
C.uintptr_t(uintptr(unsafe.Pointer(ddict))))
ensureNoError("ZSTD_initDStream_usingDDict", result) ensureNoError("ZSTD_initDStream_usingDDict", result)
} }
@ -108,7 +129,8 @@ func (zr *Reader) Release() {
return return
} }
result := C.ZSTD_freeDStream(zr.ds) result := C.ZSTD_freeDStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zr.ds))))
ensureNoError("ZSTD_freeDStream", result) ensureNoError("ZSTD_freeDStream", result)
zr.ds = nil zr.ds = nil
@ -180,7 +202,10 @@ tryDecompressAgain:
zr.outBuf.size = dstreamOutBufSize zr.outBuf.size = dstreamOutBufSize
zr.outBuf.pos = 0 zr.outBuf.pos = 0
prevInBufPos := zr.inBuf.pos prevInBufPos := zr.inBuf.pos
result := C.ZSTD_decompressStream(zr.ds, zr.outBuf, zr.inBuf) result := C.ZSTD_decompressStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zr.ds))),
C.uintptr_t(uintptr(unsafe.Pointer(zr.outBuf))),
C.uintptr_t(uintptr(unsafe.Pointer(zr.inBuf))))
zr.outBuf.size = zr.outBuf.pos zr.outBuf.size = zr.outBuf.pos
zr.outBuf.pos = 0 zr.outBuf.pos = 0

View file

@ -1,10 +1,47 @@
package gozstd package gozstd
/* /*
#cgo CFLAGS: -O3
#define ZSTD_STATIC_LINKING_ONLY #define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h" #include "zstd.h"
#include "zstd_errors.h"
#include <stdlib.h> // for malloc/free #include <stdlib.h> // for malloc/free
#include <stdint.h> // for uintptr_t
// The following *_wrapper functions allow avoiding memory allocations
// durting calls from Go.
// See https://github.com/golang/go/issues/24450 .
static size_t ZSTD_CCtx_setParameter_wrapper(uintptr_t cs, ZSTD_cParameter param, int value) {
return ZSTD_CCtx_setParameter((ZSTD_CStream*)cs, param, value);
}
static size_t ZSTD_initCStream_wrapper(uintptr_t cs, int compressionLevel) {
return ZSTD_initCStream((ZSTD_CStream*)cs, compressionLevel);
}
static size_t ZSTD_initCStream_usingCDict_wrapper(uintptr_t cs, uintptr_t dict) {
return ZSTD_initCStream_usingCDict((ZSTD_CStream*)cs, (ZSTD_CDict*)dict);
}
static size_t ZSTD_freeCStream_wrapper(uintptr_t cs) {
return ZSTD_freeCStream((ZSTD_CStream*)cs);
}
static size_t ZSTD_compressStream_wrapper(uintptr_t cs, uintptr_t output, uintptr_t input) {
return ZSTD_compressStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output, (ZSTD_inBuffer*)input);
}
static size_t ZSTD_flushStream_wrapper(uintptr_t cs, uintptr_t output) {
return ZSTD_flushStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output);
}
static size_t ZSTD_endStream_wrapper(uintptr_t cs, uintptr_t output) {
return ZSTD_endStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output);
}
*/ */
import "C" import "C"
@ -177,14 +214,21 @@ func (zw *Writer) ResetWriterParams(w io.Writer, params *WriterParams) {
func initCStream(cs *C.ZSTD_CStream, params WriterParams) { func initCStream(cs *C.ZSTD_CStream, params WriterParams) {
if params.Dict != nil { if params.Dict != nil {
result := C.ZSTD_initCStream_usingCDict(cs, params.Dict.p) result := C.ZSTD_initCStream_usingCDict_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(cs))),
C.uintptr_t(uintptr(unsafe.Pointer(params.Dict.p))))
ensureNoError("ZSTD_initCStream_usingCDict", result) ensureNoError("ZSTD_initCStream_usingCDict", result)
} else { } else {
result := C.ZSTD_initCStream(cs, C.int(params.CompressionLevel)) result := C.ZSTD_initCStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(cs))),
C.int(params.CompressionLevel))
ensureNoError("ZSTD_initCStream", result) ensureNoError("ZSTD_initCStream", result)
} }
result := C.ZSTD_CCtx_setParameter(cs, C.ZSTD_cParameter(C.ZSTD_c_windowLog), C.int(params.WindowLog)) result := C.ZSTD_CCtx_setParameter_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(cs))),
C.ZSTD_cParameter(C.ZSTD_c_windowLog),
C.int(params.WindowLog))
ensureNoError("ZSTD_CCtx_setParameter", result) ensureNoError("ZSTD_CCtx_setParameter", result)
} }
@ -200,7 +244,8 @@ func (zw *Writer) Release() {
return return
} }
result := C.ZSTD_freeCStream(zw.cs) result := C.ZSTD_freeCStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))))
ensureNoError("ZSTD_freeCStream", result) ensureNoError("ZSTD_freeCStream", result)
zw.cs = nil zw.cs = nil
@ -279,7 +324,10 @@ func (zw *Writer) Write(p []byte) (int, error) {
func (zw *Writer) flushInBuf() error { func (zw *Writer) flushInBuf() error {
prevInBufPos := zw.inBuf.pos prevInBufPos := zw.inBuf.pos
result := C.ZSTD_compressStream(zw.cs, zw.outBuf, zw.inBuf) result := C.ZSTD_compressStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))),
C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf))),
C.uintptr_t(uintptr(unsafe.Pointer(zw.inBuf))))
ensureNoError("ZSTD_compressStream", result) ensureNoError("ZSTD_compressStream", result)
// Move the remaining data to the start of inBuf. // Move the remaining data to the start of inBuf.
@ -328,7 +376,9 @@ func (zw *Writer) Flush() error {
// Flush the internal buffer to outBuf. // Flush the internal buffer to outBuf.
for { for {
result := C.ZSTD_flushStream(zw.cs, zw.outBuf) result := C.ZSTD_flushStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))),
C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf))))
ensureNoError("ZSTD_flushStream", result) ensureNoError("ZSTD_flushStream", result)
if err := zw.flushOutBuf(); err != nil { if err := zw.flushOutBuf(); err != nil {
return err return err
@ -350,7 +400,9 @@ func (zw *Writer) Close() error {
} }
for { for {
result := C.ZSTD_endStream(zw.cs, zw.outBuf) result := C.ZSTD_endStream_wrapper(
C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))),
C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf))))
ensureNoError("ZSTD_endStream", result) ensureNoError("ZSTD_endStream", result)
if err := zw.flushOutBuf(); err != nil { if err := zw.flushOutBuf(); err != nil {
return err return err

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -61,9 +61,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
const void* samplesBuffer, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples); const size_t* samplesSizes, unsigned nbSamples);
typedef struct {
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */
} ZDICT_params_t;
/*! ZDICT_finalizeDictionary():
* Given a custom content as a basis for dictionary, and a set of samples,
* finalize dictionary by adding headers and statistics according to the zstd
* dictionary format.
*
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each
* sample in order. The samples are used to construct the statistics, so they
* should be representative of what you will compress with this dictionary.
*
* The compression level can be set in `parameters`. You should pass the
* compression level you expect to use in production. The statistics for each
* compression level differ, so tuning the dictionary for the compression level
* can help quite a bit.
*
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
* a random dictionary ID for you, but we can't guarantee no collisions.
*
* The dstDictBuffer and the dictContent may overlap, and the content will be
* appended to the end of the header. If the header + the content doesn't fit in
* maxDictSize the beginning of the content is truncated to make room, since it
* is presumed that the most profitable content is at the end of the dictionary,
* since that is the cheapest to reference.
*
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
*
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
* or an error code, which can be tested by ZDICT_isError().
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
* instructed to, using notificationLevel>0.
* NOTE: This function currently may fail in several edge cases including:
* * Not enough samples
* * Samples are uncompressible
* * Samples are all exactly the same
*/
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
/*====== Helper functions ======*/ /*====== Helper functions ======*/
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
@ -78,11 +126,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
* Use them only in association with static linking. * Use them only in association with static linking.
* ==================================================================================== */ * ==================================================================================== */
typedef struct { #define ZDICT_CONTENTSIZE_MIN 128
int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ #define ZDICT_DICTSIZE_MIN 256
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
} ZDICT_params_t;
/*! ZDICT_cover_params_t: /*! ZDICT_cover_params_t:
* k and d are the only required parameters. * k and d are the only required parameters.
@ -198,28 +243,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
const size_t* samplesSizes, unsigned nbSamples, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters); ZDICT_fastCover_params_t* parameters);
/*! ZDICT_finalizeDictionary():
* Given a custom content as a basis for dictionary, and a set of samples,
* finalize dictionary by adding headers and statistics.
*
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
*
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
* or an error code, which can be tested by ZDICT_isError().
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
* Note 2: dictBuffer and dictContent can overlap
*/
#define ZDICT_CONTENTSIZE_MIN 128
#define ZDICT_DICTSIZE_MIN 256
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
typedef struct { typedef struct {
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
ZDICT_params_t zParams; ZDICT_params_t zParams;
@ -256,7 +279,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
# elif (ZDICT_GCC_VERSION >= 301) # elif (ZDICT_GCC_VERSION >= 301)
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -72,16 +72,21 @@ extern "C" {
/*------ Version ------*/ /*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4 #define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 4 #define ZSTD_VERSION_RELEASE 7
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
/*! ZSTD_versionNumber() :
* Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
ZSTDLIB_API unsigned ZSTD_versionNumber(void);
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
#define ZSTD_QUOTE(str) #str #define ZSTD_QUOTE(str) #str
#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
/*! ZSTD_versionString() :
* Return runtime library version, like "1.4.5". Requires v1.3.0+. */
ZSTDLIB_API const char* ZSTD_versionString(void);
/* ************************************* /* *************************************
* Default constant * Default constant
@ -274,7 +279,10 @@ typedef enum {
* Default level is ZSTD_CLEVEL_DEFAULT==3. * Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level. * Note 1 : it's possible to pass a negative compression level.
* Note 2 : setting a level resets all other compression parameters to default */ * Note 2 : setting a level does not automatically set all other compression parameters
* to default. Setting this will however eventually dynamically impact the compression
* parameters which have not been manually set. The manually set
* ones will 'stick'. */
/* Advanced compression parameters : /* Advanced compression parameters :
* It's possible to pin down compression parameters to some specific values. * It's possible to pin down compression parameters to some specific values.
* In which case, these values are no longer dynamically selected by the compressor */ * In which case, these values are no longer dynamically selected by the compressor */
@ -331,7 +339,9 @@ typedef enum {
* for large inputs, by finding large matches at long distance. * for large inputs, by finding large matches at long distance.
* It increases memory usage and window size. * It increases memory usage and window size.
* Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
* except when expressly set to a different value. */ * except when expressly set to a different value.
* Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
* compression strategy >= ZSTD_btopt (== compression level 16+) */
ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2.
* Larger values increase memory usage and compression ratio, * Larger values increase memory usage and compression ratio,
* but decrease compression speed. * but decrease compression speed.
@ -362,16 +372,20 @@ typedef enum {
ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
/* multi-threading parameters */ /* multi-threading parameters */
/* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
* They return an error otherwise. */ * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
* In a situation where it's unknown if the linked library supports multi-threading or not,
* setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
*/
ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
* When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
* ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
* while compression work is performed in parallel, within worker threads. * while compression is performed in parallel, within worker thread(s).
* (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
* in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
* More workers improve speed, but also increase memory usage. * More workers improve speed, but also increase memory usage.
* Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
* compression is performed inside Caller's thread, and all invocations are blocking */
ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters. * 0 means default, which is dynamically determined based on compression parameters.
@ -400,6 +414,11 @@ typedef enum {
* ZSTD_c_literalCompressionMode * ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize * ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint * ZSTD_c_srcSizeHint
* ZSTD_c_enableDedicatedDictSearch
* ZSTD_c_stableInBuffer
* ZSTD_c_stableOutBuffer
* ZSTD_c_blockDelimiters
* ZSTD_c_validateSequences
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly; * note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change. * also, the enums values themselves are unstable and can still change.
@ -410,7 +429,12 @@ typedef enum {
ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003, ZSTD_c_experimentalParam6=1003,
ZSTD_c_experimentalParam7=1004 ZSTD_c_experimentalParam7=1004,
ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006,
ZSTD_c_experimentalParam10=1007,
ZSTD_c_experimentalParam11=1008,
ZSTD_c_experimentalParam12=1009
} ZSTD_cParameter; } ZSTD_cParameter;
typedef struct { typedef struct {
@ -519,11 +543,15 @@ typedef enum {
/* note : additional experimental parameters are also available /* note : additional experimental parameters are also available
* within the experimental section of the API. * within the experimental section of the API.
* At the time of this writing, they include : * At the time of this writing, they include :
* ZSTD_c_format * ZSTD_d_format
* ZSTD_d_stableOutBuffer
* ZSTD_d_forceIgnoreChecksum
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly * note : never ever use experimentalParam? names directly
*/ */
ZSTD_d_experimentalParam1=1000 ZSTD_d_experimentalParam1=1000,
ZSTD_d_experimentalParam2=1001,
ZSTD_d_experimentalParam3=1002
} ZSTD_dParameter; } ZSTD_dParameter;
@ -659,8 +687,9 @@ typedef enum {
* - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
* - output->pos must be <= dstCapacity, input->pos must be <= srcSize * - output->pos must be <= dstCapacity, input->pos must be <= srcSize
* - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
* - endOp must be a valid directive
* - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
* - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
* and then immediately returns, just indicating that there is some data remaining to be flushed. * and then immediately returns, just indicating that there is some data remaining to be flushed.
* The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
* - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
@ -763,7 +792,7 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
/* This function is redundant with the advanced API and equivalent to: /* This function is redundant with the advanced API and equivalent to:
* *
* ZSTD_DCtx_reset(zds); * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
* ZSTD_DCtx_refDDict(zds, NULL); * ZSTD_DCtx_refDDict(zds, NULL);
*/ */
ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
@ -1095,21 +1124,40 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct { typedef struct {
unsigned int matchPos; /* Match pos in dst */ unsigned int offset; /* The offset of the match. (NOT the same as the offset code)
/* If seqDef.offset > 3, then this is seqDef.offset - 3 * If offset == 0 and matchLength == 0, this sequence represents the last
* If seqDef.offset < 3, then this is the corresponding repeat offset * literals in the block of litLength size.
* But if seqDef.offset < 3 and litLength == 0, this is the
* repeat offset before the corresponding repeat offset
* And if seqDef.offset == 3 and litLength == 0, this is the
* most recent repeat offset - 1
*/ */
unsigned int offset;
unsigned int litLength; /* Literal length */ unsigned int litLength; /* Literal length of the sequence. */
unsigned int matchLength; /* Match length */ unsigned int matchLength; /* Match length of the sequence. */
/* 0 when seq not rep and seqDef.offset otherwise
* when litLength == 0 this will be <= 4, otherwise <= 3 like normal /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
* In this case, we will treat the sequence as a marker for a block boundary.
*/
unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'.
* Ranges from [0, 3].
*
* Repeat offsets are essentially previous offsets from previous sequences sorted in
* recency order. For more detail, see doc/zstd_compression_format.md
*
* If rep == 0, then 'offset' does not contain a repeat offset.
* If rep > 0:
* If litLength != 0:
* rep == 1 --> offset == repeat_offset_1
* rep == 2 --> offset == repeat_offset_2
* rep == 3 --> offset == repeat_offset_3
* If litLength == 0:
* rep == 1 --> offset == repeat_offset_2
* rep == 2 --> offset == repeat_offset_3
* rep == 3 --> offset == repeat_offset_1 - 1
*
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
* sequence provider's perspective. For example, ZSTD_compressSequences() does not
* use this 'rep' field at all (as of now).
*/ */
unsigned int rep;
} ZSTD_Sequence; } ZSTD_Sequence;
typedef struct { typedef struct {
@ -1151,6 +1199,12 @@ typedef enum {
* Decoder cannot recognise automatically this format, requiring this instruction. */ * Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e; } ZSTD_format_e;
typedef enum {
/* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
ZSTD_d_validateChecksum = 0,
ZSTD_d_ignoreChecksum = 1
} ZSTD_forceIgnoreChecksum_e;
typedef enum { typedef enum {
/* Note: this enum and the behavior it controls are effectively internal /* Note: this enum and the behavior it controls are effectively internal
* implementation details of the compressor. They are expected to continue * implementation details of the compressor. They are expected to continue
@ -1248,38 +1302,103 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
* or an error code (if srcSize is too small) */ * or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/*! ZSTD_getSequences() : typedef enum {
* Extract sequences from the sequence store ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */
} ZSTD_sequenceFormat_e;
/*! ZSTD_generateSequences() :
* Generate sequences using ZSTD_compress2, given a source buffer.
*
* Each block will end with a dummy sequence
* with offset == 0, matchLength == 0, and litLength == length of last literals.
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
* simply acts as a block delimiter.
*
* zc can be used to insert custom compression params. * zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2 * This function invokes ZSTD_compress2
* @return : number of sequences extracted *
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
* @return : number of sequences generated
*/ */
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize); size_t outSeqsSize, const void* src, size_t srcSize);
/*! ZSTD_mergeBlockDelimiters() :
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
* by merging them into into the literals of the next sequence.
*
* As such, the final generated result has no explicit representation of block boundaries,
* and the final last literals segment is not represented in the sequences.
*
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
* @return : number of sequences left after merging
*/
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
/*! ZSTD_compressSequences() :
* Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
* If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
* The entire source is compressed into a single frame.
*
* The compression behavior changes based on cctx params. In particular:
* If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
* no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
* the block size derived from the cctx, and sequences may be split. This is the default setting.
*
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
* block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
*
* If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
*
* In addition to the two adjustable experimental params, there are other important cctx params.
* - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
* - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
* - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
*
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
* and cannot emit an RLE block that disagrees with the repcode history
* @return : final compressed size or a ZSTD error.
*/
ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize);
/*************************************** /***************************************
* Memory management * Memory management
***************************************/ ***************************************/
/*! ZSTD_estimate*() : /*! ZSTD_estimate*() :
* These functions make it possible to estimate memory usage of a future * These functions make it possible to estimate memory usage
* {D,C}Ctx, before its creation. * of a future {D,C}Ctx, before its creation.
* *
* ZSTD_estimateCCtxSize() will provide a budget large enough for any * ZSTD_estimateCCtxSize() will provide a memory budget large enough
* compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), * for any compression level up to selected one.
* this estimate does not include space for a window buffer, so this estimate * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
* is guaranteed to be enough for single-shot compressions, but not streaming * does not include space for a window buffer.
* compressions. It will however assume the input may be arbitrarily large, * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
* which is the worst case. If srcSize is known to always be small, * The estimate will assume the input may be arbitrarily large,
* ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. * which is the worst case.
* ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
* ZSTD_getCParams() to create cParams from compressionLevel.
* ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
* ZSTD_CCtxParams_setParameter().
* *
* Note: only single-threaded compression is supported. This function will * When srcSize can be bound by a known and rather "small" value,
* return an error code if ZSTD_c_nbWorkers is >= 1. */ * this fact can be used to provide a tighter estimation
* because the CCtx compression context will need less memory.
* This tighter estimation can be provided by more advanced functions
* ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
* and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
* Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
*
* Note 2 : only single-threaded compression is supported.
* ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
*/
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@ -1362,7 +1481,11 @@ ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ static
#ifdef __GNUC__
__attribute__((__unused__))
#endif
ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
@ -1375,13 +1498,36 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
ZSTD_compressionParameters cParams, ZSTD_compressionParameters cParams,
ZSTD_customMem customMem); ZSTD_customMem customMem);
/* ! Thread pool :
* These prototypes make it possible to share a thread pool among multiple compression contexts.
* This can limit resources for applications with multiple threads where each one uses
* a threaded compression mode (via ZSTD_c_nbWorkers parameter).
* ZSTD_createThreadPool creates a new thread pool with a given number of threads.
* Note that the lifetime of such pool must exist while being used.
* ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
* to use an internal thread pool).
* ZSTD_freeThreadPool frees a thread pool.
*/
typedef struct POOL_ctx_s ZSTD_threadPool;
ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
/*
* This API is temporary and is expected to change or disappear in the future!
*/
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType,
const ZSTD_CCtx_params* cctxParams,
ZSTD_customMem customMem);
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType, ZSTD_dictContentType_e dictContentType,
ZSTD_customMem customMem); ZSTD_customMem customMem);
/*************************************** /***************************************
* Advanced compression functions * Advanced compression functions
***************************************/ ***************************************/
@ -1394,6 +1540,12 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
* note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getDictID_fromCDict() :
* Provides the dictID of the dictionary loaded into `cdict`.
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
/*! ZSTD_getCParams() : /*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
* `estimatedSrcSize` value is optional, select 0 if not known */ * `estimatedSrcSize` value is optional, select 0 if not known */
@ -1508,6 +1660,143 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* but compression ratio may regress significantly if guess considerably underestimates */ * but compression ratio may regress significantly if guess considerably underestimates */
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
/* Controls whether the new and experimental "dedicated dictionary search
* structure" can be used. This feature is still rough around the edges, be
* prepared for surprising behavior!
*
* How to use it:
*
* When using a CDict, whether to use this feature or not is controlled at
* CDict creation, and it must be set in a CCtxParams set passed into that
* construction (via ZSTD_createCDict_advanced2()). A compression will then
* use the feature or not based on how the CDict was constructed; the value of
* this param, set in the CCtx, will have no effect.
*
* However, when a dictionary buffer is passed into a CCtx, such as via
* ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
* whether the CDict that is created internally can use the feature or not.
*
* What it does:
*
* Normally, the internal data structures of the CDict are analogous to what
* would be stored in a CCtx after compressing the contents of a dictionary.
* To an approximation, a compression using a dictionary can then use those
* data structures to simply continue what is effectively a streaming
* compression where the simulated compression of the dictionary left off.
* Which is to say, the search structures in the CDict are normally the same
* format as in the CCtx.
*
* It is possible to do better, since the CDict is not like a CCtx: the search
* structures are written once during CDict creation, and then are only read
* after that, while the search structures in the CCtx are both read and
* written as the compression goes along. This means we can choose a search
* structure for the dictionary that is read-optimized.
*
* This feature enables the use of that different structure.
*
* Note that some of the members of the ZSTD_compressionParameters struct have
* different semantics and constraints in the dedicated search structure. It is
* highly recommended that you simply set a compression level in the CCtxParams
* you pass into the CDict creation call, and avoid messing with the cParams
* directly.
*
* Effects:
*
* This will only have any effect when the selected ZSTD_strategy
* implementation supports this feature. Currently, that's limited to
* ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
*
* Note that this means that the CDict tables can no longer be copied into the
* CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
* useable. The dictionary can only be attached or reloaded.
*
* In general, you should expect compression to be faster--sometimes very much
* so--and CDict creation to be slightly slower. Eventually, we will probably
* make this mode the default.
*/
#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
/* ZSTD_c_stableInBuffer
* Experimental parameter.
* Default is 0 == disabled. Set to 1 to enable.
*
* Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
* between calls, except for the modifications that zstd makes to pos (the
* caller must not modify pos). This is checked by the compressor, and
* compression will fail if it ever changes. This means the only flush
* mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
* is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
* MUST not be modified during compression or you will get data corruption.
*
* When this flag is enabled zstd won't allocate an input window buffer,
* because the user guarantees it can reference the ZSTD_inBuffer until
* the frame is complete. But, it will still allocate an output buffer
* large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
* avoid the memcpy() from the input buffer to the input window buffer.
*
* NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
* That means this flag cannot be used with ZSTD_compressStream().
*
* NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
* this flag is ALWAYS memory safe, and will never access out-of-bounds
* memory. However, compression WILL fail if you violate the preconditions.
*
* WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
* not be modified during compression or you will get data corruption. This
* is because zstd needs to reference data in the ZSTD_inBuffer to find
* matches. Normally zstd maintains its own window buffer for this purpose,
* but passing this flag tells zstd to use the user provided buffer.
*/
#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
/* ZSTD_c_stableOutBuffer
* Experimental parameter.
* Default is 0 == disabled. Set to 1 to enable.
*
* Tells he compressor that the ZSTD_outBuffer will not be resized between
* calls. Specifically: (out.size - out.pos) will never grow. This gives the
* compressor the freedom to say: If the compressed data doesn't fit in the
* output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
* always decompress directly into the output buffer, instead of decompressing
* into an internal buffer and copying to the output buffer.
*
* When this flag is enabled zstd won't allocate an output buffer, because
* it can write directly to the ZSTD_outBuffer. It will still allocate the
* input window buffer (see ZSTD_c_stableInBuffer).
*
* Zstd will check that (out.size - out.pos) never grows and return an error
* if it does. While not strictly necessary, this should prevent surprises.
*/
#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
/* ZSTD_c_blockDelimiters
* Default is 0 == ZSTD_sf_noBlockDelimiters.
*
* For use with sequence compression API: ZSTD_compressSequences().
*
* Designates whether or not the given array of ZSTD_Sequence contains block delimiters
* and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
* See the definition of ZSTD_Sequence for more specifics.
*/
#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
/* ZSTD_c_validateSequences
* Default is 0 == disabled. Set to 1 to enable sequence validation.
*
* For use with sequence compression API: ZSTD_compressSequences().
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
* during function execution.
*
* Without validation, providing a sequence that does not conform to the zstd spec will cause
* undefined behavior, and may produce a corrupted block.
*
* With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and
* return an error.
*
*/
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
/*! ZSTD_CCtx_getParameter() : /*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter, * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value. * and store it into int* value.
@ -1556,8 +1845,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z
/*! ZSTD_CCtxParams_setParameter() : /*! ZSTD_CCtxParams_setParameter() :
* Similar to ZSTD_CCtx_setParameter. * Similar to ZSTD_CCtx_setParameter.
* Set one compression parameter, selected by enum ZSTD_cParameter. * Set one compression parameter, selected by enum ZSTD_cParameter.
* Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). * Parameters must be applied to a ZSTD_CCtx using
* @result : 0, or an error code (which can be tested with ZSTD_isError()). * ZSTD_CCtx_setParametersUsingCCtxParams().
* @result : a code representing success or failure (which can be tested with
* ZSTD_isError()).
*/ */
ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
@ -1637,11 +1928,60 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre
*/ */
ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
/*! ZSTD_DCtx_getParameter() :
* Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
* and store it into int* value.
* @return : 0, or an error code (which can be tested with ZSTD_isError()).
*/
ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
/* ZSTD_d_format /* ZSTD_d_format
* experimental parameter, * experimental parameter,
* allowing selection between ZSTD_format_e input compression formats * allowing selection between ZSTD_format_e input compression formats
*/ */
#define ZSTD_d_format ZSTD_d_experimentalParam1 #define ZSTD_d_format ZSTD_d_experimentalParam1
/* ZSTD_d_stableOutBuffer
* Experimental parameter.
* Default is 0 == disabled. Set to 1 to enable.
*
* Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
* between calls, except for the modifications that zstd makes to pos (the
* caller must not modify pos). This is checked by the decompressor, and
* decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
* MUST be large enough to fit the entire decompressed frame. This will be
* checked when the frame content size is known. The data in the ZSTD_outBuffer
* in the range [dst, dst + pos) MUST not be modified during decompression
* or you will get data corruption.
*
* When this flags is enabled zstd won't allocate an output buffer, because
* it can write directly to the ZSTD_outBuffer, but it will still allocate
* an input buffer large enough to fit any compressed block. This will also
* avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
* If you need to avoid the input buffer allocation use the buffer-less
* streaming API.
*
* NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
* this flag is ALWAYS memory safe, and will never access out-of-bounds
* memory. However, decompression WILL fail if you violate the preconditions.
*
* WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
* not be modified during decompression or you will get data corruption. This
* is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
* matches. Normally zstd maintains its own buffer for this purpose, but passing
* this flag tells zstd to use the user provided buffer.
*/
#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
/* ZSTD_d_forceIgnoreChecksum
* Experimental parameter.
* Default is 0 == disabled. Set to 1 to enable
*
* Tells the decompressor to skip checksum validation during decompression, regardless
* of whether checksumming was specified during compression. This offers some
* slight performance benefits, and may be useful for debugging.
* Param has values of type ZSTD_forceIgnoreChecksum_e
*/
#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
/*! ZSTD_DCtx_setFormat() : /*! ZSTD_DCtx_setFormat() :
* Instruct the decoder context about what kind of data to decode next. * Instruct the decoder context about what kind of data to decode next.
@ -1670,7 +2010,8 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
********************************************************************/ ********************************************************************/
/*===== Advanced Streaming compression functions =====*/ /*===== Advanced Streaming compression functions =====*/
/**! ZSTD_initCStream_srcSize() :
/*! ZSTD_initCStream_srcSize() :
* This function is deprecated, and equivalent to: * This function is deprecated, and equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
@ -1687,7 +2028,7 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
int compressionLevel, int compressionLevel,
unsigned long long pledgedSrcSize); unsigned long long pledgedSrcSize);
/**! ZSTD_initCStream_usingDict() : /*! ZSTD_initCStream_usingDict() :
* This function is deprecated, and is equivalent to: * This function is deprecated, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@ -1704,7 +2045,7 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const void* dict, size_t dictSize,
int compressionLevel); int compressionLevel);
/**! ZSTD_initCStream_advanced() : /*! ZSTD_initCStream_advanced() :
* This function is deprecated, and is approximately equivalent to: * This function is deprecated, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* // Pseudocode: Set each zstd parameter and leave the rest as-is. * // Pseudocode: Set each zstd parameter and leave the rest as-is.
@ -1725,7 +2066,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
ZSTD_parameters params, ZSTD_parameters params,
unsigned long long pledgedSrcSize); unsigned long long pledgedSrcSize);
/**! ZSTD_initCStream_usingCDict() : /*! ZSTD_initCStream_usingCDict() :
* This function is deprecated, and equivalent to: * This function is deprecated, and equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, cdict); * ZSTD_CCtx_refCDict(zcs, cdict);
@ -1735,7 +2076,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
*/ */
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
/**! ZSTD_initCStream_usingCDict_advanced() : /*! ZSTD_initCStream_usingCDict_advanced() :
* This function is DEPRECATED, and is approximately equivalent to: * This function is DEPRECATED, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* // Pseudocode: Set each zstd frame parameter and leave the rest as-is. * // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
@ -1808,7 +2149,8 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
/*===== Advanced Streaming decompression functions =====*/ /*===== Advanced Streaming decompression functions =====*/
/**
/*!
* This function is deprecated, and is equivalent to: * This function is deprecated, and is equivalent to:
* *
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@ -1819,7 +2161,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
*/ */
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
/** /*!
* This function is deprecated, and is equivalent to: * This function is deprecated, and is equivalent to:
* *
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@ -1830,7 +2172,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
*/ */
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
/** /*!
* This function is deprecated, and is equivalent to: * This function is deprecated, and is equivalent to:
* *
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@ -1892,7 +2234,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/*- /**
Buffer-less streaming decompression (synchronous mode) Buffer-less streaming decompression (synchronous mode)
A ZSTD_DCtx object is required to track streaming operations. A ZSTD_DCtx object is required to track streaming operations.

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -76,6 +76,8 @@ typedef enum {
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102, ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode; } ZSTD_ErrorCode;

4
vendor/modules.txt vendored
View file

@ -16,7 +16,7 @@ github.com/VictoriaMetrics/fasthttp/fasthttputil
github.com/VictoriaMetrics/fasthttp/stackless github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.12.3 # github.com/VictoriaMetrics/metrics v1.12.3
github.com/VictoriaMetrics/metrics github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.9.0 # github.com/VictoriaMetrics/metricsql v0.9.1
github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop github.com/VictoriaMetrics/metricsql/binaryop
# github.com/aws/aws-sdk-go v1.36.7 # github.com/aws/aws-sdk-go v1.36.7
@ -148,7 +148,7 @@ github.com/valyala/fastjson/fastfloat
github.com/valyala/fastrand github.com/valyala/fastrand
# github.com/valyala/fasttemplate v1.2.1 # github.com/valyala/fasttemplate v1.2.1
github.com/valyala/fasttemplate github.com/valyala/fasttemplate
# github.com/valyala/gozstd v1.8.3 # github.com/valyala/gozstd v1.9.0
github.com/valyala/gozstd github.com/valyala/gozstd
# github.com/valyala/histogram v1.1.2 # github.com/valyala/histogram v1.1.2
github.com/valyala/histogram github.com/valyala/histogram