From 7b09e752b7738add16307bbd32c4383d3c3c0617 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Mon, 13 Nov 2023 21:19:06 +0100
Subject: [PATCH] lib/protoparser/promremotewrite: fall back to Snappy decoding
 if zstd decoding fails

This case is possible after the following steps:

1. vmagent tries to perform handshake with the -remoteWrite.url in order to determine whether
   the remote storage supports zstd-compressed data.
2. The remote storage is unavailable during the handshake. In this case vmagent falls back to Snappy compression
   for the data sent to the remote storage.
3. vmagent compresses the collected data into blocks with Snappy and puts these blocks to persistent queue on disk.
4. The remote storage becomes available.
5. vmagent restarts, performs the handshake with the remote storage and detects that it supports zstd-compressed data.
6. vmagent starts sending Snappy-compressed data from persistent queue to the remote storage,
   while falsely advertizing it sends zstd-compressed data.
7. The remote storage receives Snappy-compressed data and fails unpacking it with zstd.

The solution is to just fall back to Snappy decompression if zstd decompression fails.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301
---
 docs/CHANGELOG.md                                      | 1 +
 lib/protoparser/promremotewrite/stream/streamparser.go | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 5eddc1330f..ae16dc7e3c 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -14,6 +14,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components
 * SECURITY: upgrade Go builder from Go1.21.3 to Go1.21.4. See [the list of issues addressed in Go1.21.4](https://github.com/golang/go/issues?q=milestone%3AGo1.21.4+label%3ACherryPickApproved).
 
 * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) with `regex`, which start and end with `.+` or `.*` and which contain alternate sub-regexps. For example, `.+;|;.+` or `.*foo|bar|baz.*`. Previously such regexps were improperly parsed, which could result in undexpected relabeling results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297).
+* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly decode Snappy-encoded data blocks received via [VictoriaMetrics remote_write protocol](https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301).
 
 ## [v1.93.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.7)
 
diff --git a/lib/protoparser/promremotewrite/stream/streamparser.go b/lib/protoparser/promremotewrite/stream/streamparser.go
index ee01b76818..60e6b74b7b 100644
--- a/lib/protoparser/promremotewrite/stream/streamparser.go
+++ b/lib/protoparser/promremotewrite/stream/streamparser.go
@@ -42,7 +42,14 @@ func Parse(r io.Reader, isVMRemoteWrite bool, callback func(tss []prompb.TimeSer
 	if isVMRemoteWrite {
 		bb.B, err = zstd.Decompress(bb.B[:0], ctx.reqBuf.B)
 		if err != nil {
-			return fmt.Errorf("cannot decompress zstd-encoded request with length %d: %w", len(ctx.reqBuf.B), err)
+			// Fall back to Snappy decompression, since vmagent may send snappy-encoded messages
+			// with 'Content-Encoding: zstd' header if they were put into persistent queue before vmagent restart.
+			// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301
+			zstdErr := err
+			bb.B, err = snappy.Decode(bb.B[:cap(bb.B)], ctx.reqBuf.B)
+			if err != nil {
+				return fmt.Errorf("cannot decompress zstd-encoded request with length %d: %w", len(ctx.reqBuf.B), zstdErr)
+			}
 		}
 	} else {
 		bb.B, err = snappy.Decode(bb.B[:cap(bb.B)], ctx.reqBuf.B)