app/vmalert/datasource: reduce number of allocations when parsing instant responses (#6272)

Allocations are reduced by implementing custom json parser via fastjson
lib.
The change also re-uses `promInstant` object in attempt to reduce number
of
allocations when parsing big responses, as usually happens with heavy
recording rules.

```
name                                old allocs/op  new allocs/op  delta
ParsePrometheusResponse/Instant-10     9.65k ± 0%     5.60k ± 0%   ~     (p=1.000 n=1+1)

```

Signed-off-by: hagen1778 <roman@victoriametrics.com>

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2024-05-15 15:18:33 +02:00 committed by GitHub
parent 6fdba8599d
commit 4f0525852f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 189 additions and 29 deletions

File diff suppressed because one or more lines are too long

View file

@ -7,6 +7,10 @@ import (
"net/http"
"strconv"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/valyala/fastjson"
)
var (
@ -31,27 +35,85 @@ type promResponse struct {
} `json:"stats,omitempty"`
}
// see https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
type promInstant struct {
Result []struct {
Labels map[string]string `json:"metric"`
TV [2]interface{} `json:"value"`
} `json:"result"`
// ms is populated after Unmarshal call
ms []Metric
}
func (r promInstant) metrics() ([]Metric, error) {
result := make([]Metric, len(r.Result))
for i, res := range r.Result {
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
if err != nil {
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
}
var m Metric
m.SetLabels(res.Labels)
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
m.Values = append(m.Values, f)
result[i] = m
// metrics returned parsed Metric slice
// Must be called only after Unmarshal
func (pi *promInstant) metrics() ([]Metric, error) {
return pi.ms, nil
}
var jsonParserPool fastjson.ParserPool
// Unmarshal unmarshals the given byte slice into promInstant
// It is using fastjson to reduce number of allocations compared to
// standard json.Unmarshal function.
// Response example:
//
// [{"metric":{"__name__":"up","job":"prometheus"},value": [ 1435781451.781,"1"]},
// {"metric":{"__name__":"up","job":"node"},value": [ 1435781451.781,"0"]}]
func (pi *promInstant) Unmarshal(b []byte) error {
p := jsonParserPool.Get()
defer jsonParserPool.Put(p)
v, err := p.ParseBytes(b)
if err != nil {
return err
}
return result, nil
rows, err := v.Array()
if err != nil {
return fmt.Errorf("cannot find the top-level array of result objects: %w", err)
}
pi.ms = make([]Metric, len(rows))
for i, row := range rows {
metric := row.Get("metric")
if metric == nil {
return fmt.Errorf("can't find `metric` object in %q", row)
}
labels := metric.GetObject()
r := &pi.ms[i]
r.Labels = make([]Label, 0, labels.Len())
labels.Visit(func(key []byte, v *fastjson.Value) {
lv, errLocal := v.StringBytes()
if errLocal != nil {
err = fmt.Errorf("error when parsing label value %q: %s", v, errLocal)
return
}
r.Labels = append(r.Labels, Label{
Name: string(key),
Value: string(lv),
})
})
if err != nil {
return fmt.Errorf("error when parsing `metric` object in %q: %w", row, err)
}
value := row.Get("value")
if value == nil {
return fmt.Errorf("can't find `value` object in %q", row)
}
sample := value.GetArray()
if len(sample) != 2 {
return fmt.Errorf("object `value` in %q should contain 2 values, but contains %d instead", row, len(sample))
}
r.Timestamps = []int64{sample[0].GetInt64()}
val, err := sample[1].StringBytes()
if err != nil {
return fmt.Errorf("error when parsing `value` object %q: %s", sample[1], err)
}
f, err := strconv.ParseFloat(bytesutil.ToUnsafeString(val), 64)
if err != nil {
return fmt.Errorf("error when parsing float64 from %s in %q: %w", sample[1], row, err)
}
r.Values = []float64{f}
}
return nil
}
type promRange struct {
@ -118,7 +180,7 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
switch r.Data.ResultType {
case rtVector:
var pi promInstant
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
if err := pi.Unmarshal(r.Data.Result); err != nil {
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
}
parseFn = pi.metrics

View file

@ -1,20 +1,73 @@
package datasource
import (
"encoding/json"
"reflect"
"testing"
)
func BenchmarkMetrics(b *testing.B) {
payload := []byte(`[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests", "foo":"bar", "baz": "qux"},"value":[1583786140,"2000"]}]`)
var pi promInstant
if err := json.Unmarshal(payload, &pi.Result); err != nil {
b.Fatalf(err.Error())
}
b.Run("Instant", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = pi.metrics()
func TestPromInstant_UnmarshalPositive(t *testing.T) {
f := func(data string, exp []Metric) {
t.Helper()
var pi promInstant
err := pi.Unmarshal([]byte(data))
if err != nil {
t.Fatalf("unexpected unmarshal err %v; \n %v", err, string(data))
}
got, _ := pi.metrics()
if !reflect.DeepEqual(got, exp) {
t.Fatalf("expected to get:\n%v\ngot instead:\n%v", exp, got)
}
}
f(`[{"metric":{"__name__":"up"},"value":[1583780000,"42"]}]`, []Metric{
{
Labels: []Label{{Name: "__name__", Value: "up"}},
Timestamps: []int64{1583780000},
Values: []float64{42},
},
})
f(`[
{"metric":{"__name__":"up"},"value":[1583780000,"42"]},
{"metric":{"__name__":"foo"},"value":[1583780001,"7"]},
{"metric":{"__name__":"baz", "instance":"bar"},"value":[1583780002,"8"]}]`, []Metric{
{
Labels: []Label{{Name: "__name__", Value: "up"}},
Timestamps: []int64{1583780000},
Values: []float64{42},
},
{
Labels: []Label{{Name: "__name__", Value: "foo"}},
Timestamps: []int64{1583780001},
Values: []float64{7},
},
{
Labels: []Label{{Name: "__name__", Value: "baz"}, {Name: "instance", Value: "bar"}},
Timestamps: []int64{1583780002},
Values: []float64{8},
},
})
}
func TestPromInstant_UnmarshalNegative(t *testing.T) {
f := func(data string) {
t.Helper()
var pi promInstant
err := pi.Unmarshal([]byte(data))
if err == nil {
t.Fatalf("expected to get an error; got nil instead")
}
}
f(``)
f(`foo`)
f(`[{"metric":{"__name__":"up"},"value":[1583780000,"42"]},`)
f(`[{"metric":{"__name__"},"value":[1583780000,"42"]},`)
// no `metric` object
f(`[{"value":[1583780000,"42"]}]`)
// no `value` object
f(`[{"metric":{"__name__":"up"}}]`)
// less than 2 values in `value` object
f(`[{"metric":{"__name__":"up"},"value":["42"]}]`)
f(`[{"metric":{"__name__":"up"},"value":[1583780000]}]`)
// non-numeric sample value
f(`[{"metric":{"__name__":"up"},"value":[1583780000,"foo"]}]`)
}

View file

@ -0,0 +1,43 @@
package datasource
import (
"bytes"
"io"
"net/http"
"os"
"testing"
)
func BenchmarkMetrics(b *testing.B) {
payload := []byte(`[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests", "foo":"bar", "baz": "qux"},"value":[1583786140,"2000"]}]`)
var pi promInstant
if err := pi.Unmarshal(payload); err != nil {
b.Fatalf(err.Error())
}
b.Run("Instant", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = pi.metrics()
}
})
}
func BenchmarkParsePrometheusResponse(b *testing.B) {
req, _ := http.NewRequest("GET", "", nil)
resp := &http.Response{StatusCode: http.StatusOK}
data, err := os.ReadFile("testdata/instant_response.json")
if err != nil {
b.Fatalf("error while reading file: %s", err)
}
resp.Body = io.NopCloser(bytes.NewReader(data))
b.Run("Instant", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := parsePrometheusResponse(req, resp)
if err != nil {
b.Fatalf("unexpected parse err: %s", err)
}
resp.Body = io.NopCloser(bytes.NewReader(data))
}
})
}

View file

@ -44,6 +44,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow configuring `-remoteWrite.disableOnDiskQueue` and `-remoteWrite.dropSamplesOnOverload` cmd-line flags per each `-remoteWrite.url`. See this [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6065). Thanks to @rbizos for implementaion!
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add labels `path` and `url` to metrics `vmagent_remotewrite_push_failures_total` and `vmagent_remotewrite_samples_dropped_total`. Now number of failed pushes and dropped samples can be tracked per `-remoteWrite.url`.
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): add [rate_sum](https://docs.victoriametrics.com/stream-aggregation/#rate_sum) and [rate_avg](https://docs.victoriametrics.com/stream-aggregation/#rate_avg) aggregation outputs.
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): reduce CPU usage when evaluating high number of alerting and recording rules.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix bug that prevents the first query trace from expanding on click event. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6186). The issue was introduced in [v1.100.0](https://docs.victoriametrics.com/changelog/#v11000) release.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): prevent potential panic during [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) if more than one `--remoteWrite.streamAggr.dedupInterval` is configured. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6205).