From 8551fbe9f38872fee829a9a821911ae9307ec2e7 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 25 Jul 2024 14:15:07 +0200 Subject: [PATCH] Revert "refactor(vmstorage): Refactor the code to reduce the time complexity of `MustAddRows` and improve readability (#6629)" This reverts commit e280d90e9a5e760c35da053e3bef8005bbad44f3. Reason for revert: the updated code doesn't improve the performance of table.MustAddRows for the typical case when rows contain timestamps belonging to ptws[0]. The performance may be improved in theory for the case when all the rows belong to partiton other than ptws[0], but this partition is automatically moved to ptws[0] by the code at lines https://github.com/VictoriaMetrics/VictoriaMetrics/blob/6aad1d43e9dad03c6192f4f91813d3129d6d1e8e/lib/storage/table.go#L287-L298 , so the next time the typical case will work. Also the updated code makes the code harder to follow, since it introduces an additional level of indirection with non-trivial semantics inside table.MustAddRows - the partition.TimeRangeInPartition() function. This function needs to be inspected and understood when reading the code at table.MustAddRows(). This function depends on minTsInRows and maxTsInRows vars, which are defined and initialized many lines above the partition.TimeRangeInPartition() call. This complicates reading and understanding the code even more. The previous code was using clearer loop over rows with the clear call to partition.HasTimestamp() for every timestamp in the row. The partition.HasTimestamp() call is used in the table.MustAddRows() function multiple times. This makes the use of partition.HasTimestamp() call more consistent, easier to understand and easier to maintain comparing to the mix of partition.HasTimestamp() and partition.TimeRangeInPartition() calls. Aslo, there is no need in documenting some hardcore software engineering refactoring at docs/CHANGLELOG.md, since the docs/CHANGELOG.md is intended for VictoriaMetrics users, who may not know software engineering. The docs/CHANGELOG.md must document user-visible changes, and the docs must be concise and clear for VictoriaMetrics users. See https://docs.victoriametrics.com/contributing/#pull-request-checklist for more details. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6629 --- docs/CHANGELOG.md | 1 - lib/storage/partition.go | 5 ----- lib/storage/table.go | 16 +++++++++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 23af4fe2b..6c949193d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,7 +33,6 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * SECURITY: upgrade base docker image (Alpine) from 3.20.1 to 3.20.2. See [alpine 3.20.2 release notes](https://alpinelinux.org/posts/Alpine-3.20.2-released.html). -* FEATURE: [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/) and [VictoriaMetrics Cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Refactor the code located in the `MustAddRows` function of `vmstorage` to improve performance and readability. * FEATURE: [vmauth](./vmauth.md): add `keep_original_host` option, which can be used for proxying the original `Host` header from client request to the backend. By default the backend host is used as `Host` header when proxying requests to the configured backends. See [these docs](./vmauth.md#host-http-header). * FEATURE: [vmauth](./vmauth.md) now returns HTTP 502 status code when all upstream backends are not available. Previously, it returned HTTP 503 status code. This change aligns vmauth behavior with other well-known reverse-proxies behavior. diff --git a/lib/storage/partition.go b/lib/storage/partition.go index 68a17a6ce..ab70c3791 100644 --- a/lib/storage/partition.go +++ b/lib/storage/partition.go @@ -849,11 +849,6 @@ func (pt *partition) HasTimestamp(timestamp int64) bool { return timestamp >= pt.tr.MinTimestamp && timestamp <= pt.tr.MaxTimestamp } -// TimeRangeInPartition returns true if the pt contains the given time range. -func (pt *partition) TimeRangeInPartition(minTimestamp, MaxTimestamp int64) bool { - return minTimestamp >= pt.tr.MinTimestamp && MaxTimestamp <= pt.tr.MaxTimestamp -} - // GetParts appends parts snapshot to dst and returns it. // // The appended parts must be released with PutParts. diff --git a/lib/storage/table.go b/lib/storage/table.go index 4084c31f8..bbb48405d 100644 --- a/lib/storage/table.go +++ b/lib/storage/table.go @@ -267,12 +267,7 @@ func (tb *table) MustAddRows(rows []rawRow) { if len(rows) == 0 { return } - // Get the maximum and minimum timestamps in the rows - maxTsInRows, minTsInRows := rows[0].Timestamp, rows[0].Timestamp - for i := range rows { - maxTsInRows = max(maxTsInRows, rows[i].Timestamp) - minTsInRows = min(minTsInRows, rows[i].Timestamp) - } + // Verify whether all the rows may be added to a single partition. ptwsX := getPartitionWrappers() defer putPartitionWrappers(ptwsX) @@ -280,7 +275,14 @@ func (tb *table) MustAddRows(rows []rawRow) { ptwsX.a = tb.GetPartitions(ptwsX.a[:0]) ptws := ptwsX.a for i, ptw := range ptws { - if !ptw.pt.TimeRangeInPartition(minTsInRows, maxTsInRows) { + singlePt := true + for j := range rows { + if !ptw.pt.HasTimestamp(rows[j].Timestamp) { + singlePt = false + break + } + } + if !singlePt { continue }