lib/logstorage: work-in-progress

2024-11-21 14:44:00 +00:00 · 2024-05-25 21:36:16 +02:00 · 2024-05-25 21:36:16 +02:00 · dc55146752
commit dc55146752
parent e2590f0485
46 changed files with 2615 additions and 808 deletions
--- a/app/vlselect/logsql/hits_response.qtpl
+++ b/app/vlselect/logsql/hits_response.qtpl
@ -6,8 +6,8 @@

 {% stripspace %}

-// LabelsForHits formats labels for /select/logsql/hits response
-{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
+// FieldsForHits formats labels for /select/logsql/hits response
+{% func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
 {
 	{% if len(columns) > 0 %}
 		{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}
--- a/app/vlselect/logsql/hits_response.qtpl.go
+++ b/app/vlselect/logsql/hits_response.qtpl.go
@ -11,7 +11,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
 )

-// LabelsForHits formats labels for /select/logsql/hits response
+// FieldsForHits formats labels for /select/logsql/hits response

 //line app/vlselect/logsql/hits_response.qtpl:10
 import (
@ -27,7 +27,7 @@ var (
 )

 //line app/vlselect/logsql/hits_response.qtpl:10
-func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
+func StreamFieldsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
 //line app/vlselect/logsql/hits_response.qtpl:10
 	qw422016.N().S(`{`)
 //line app/vlselect/logsql/hits_response.qtpl:12
@ -58,22 +58,22 @@ func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockCo
 }

 //line app/vlselect/logsql/hits_response.qtpl:19
-func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
+func WriteFieldsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
 //line app/vlselect/logsql/hits_response.qtpl:19
 	qw422016 := qt422016.AcquireWriter(qq422016)
 //line app/vlselect/logsql/hits_response.qtpl:19
-	StreamLabelsForHits(qw422016, columns, rowIdx)
+	StreamFieldsForHits(qw422016, columns, rowIdx)
 //line app/vlselect/logsql/hits_response.qtpl:19
 	qt422016.ReleaseWriter(qw422016)
 //line app/vlselect/logsql/hits_response.qtpl:19
 }

 //line app/vlselect/logsql/hits_response.qtpl:19
-func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
+func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
 //line app/vlselect/logsql/hits_response.qtpl:19
 	qb422016 := qt422016.AcquireByteBuffer()
 //line app/vlselect/logsql/hits_response.qtpl:19
-	WriteLabelsForHits(qb422016, columns, rowIdx)
+	WriteFieldsForHits(qb422016, columns, rowIdx)
 //line app/vlselect/logsql/hits_response.qtpl:19
 	qs422016 := string(qb422016.B)
 //line app/vlselect/logsql/hits_response.qtpl:19
--- a/app/vlselect/logsql/logsql.go
+++ b/app/vlselect/logsql/logsql.go
@ -77,7 +77,7 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ
 			hitsStr := strings.Clone(hitsValues[i])

 			bb.Reset()
-			WriteLabelsForHits(bb, columns, i)
+			WriteFieldsForHits(bb, columns, i)

 			mLock.Lock()
 			hs, ok := m[string(bb.B)]
@ -189,21 +189,21 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
 	WriteValuesWithHitsJSON(w, values)
 }

-// ProcessStreamLabelNamesRequest processes /select/logsql/stream_label_names request.
+// ProcessStreamFieldNamesRequest processes /select/logsql/stream_field_names request.
 //
-// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names
-func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
+// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names
+func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
 	q, tenantIDs, err := parseCommonArgs(r)
 	if err != nil {
 		httpserver.Errorf(w, r, "%s", err)
 		return
 	}

-	// Obtain stream label names for the given query
+	// Obtain stream field names for the given query
 	q.Optimize()
-	names, err := vlstorage.GetStreamLabelNames(ctx, tenantIDs, q)
+	names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q)
 	if err != nil {
-		httpserver.Errorf(w, r, "cannot obtain stream label names: %s", err)
+		httpserver.Errorf(w, r, "cannot obtain stream field names: %s", err)
 	}

 	// Write results
@ -211,20 +211,20 @@ func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter,
 	WriteValuesWithHitsJSON(w, names)
 }

-// ProcessStreamLabelValuesRequest processes /select/logsql/stream_label_values request.
+// ProcessStreamFieldValuesRequest processes /select/logsql/stream_field_values request.
 //
-// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values
-func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
+// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values
+func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
 	q, tenantIDs, err := parseCommonArgs(r)
 	if err != nil {
 		httpserver.Errorf(w, r, "%s", err)
 		return
 	}

-	// Parse labelName query arg
-	labelName := r.FormValue("label")
-	if labelName == "" {
-		httpserver.Errorf(w, r, "missing 'label' query arg")
+	// Parse fieldName query arg
+	fieldName := r.FormValue("field")
+	if fieldName == "" {
+		httpserver.Errorf(w, r, "missing 'field' query arg")
 		return
 	}

@ -238,11 +238,11 @@ func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter,
 		limit = 0
 	}

-	// Obtain stream label names for the given query
+	// Obtain stream field values for the given query and the given fieldName
 	q.Optimize()
-	values, err := vlstorage.GetStreamLabelValues(ctx, tenantIDs, q, labelName, uint64(limit))
+	values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
 	if err != nil {
-		httpserver.Errorf(w, r, "cannot obtain stream label values: %s", err)
+		httpserver.Errorf(w, r, "cannot obtain stream field values: %s", err)
 	}

 	// Write results
--- a/app/vlselect/main.go
+++ b/app/vlselect/main.go
@ -157,13 +157,13 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		logsqlQueryRequests.Inc()
 		logsql.ProcessQueryRequest(ctx, w, r)
 		return true
-	case "/select/logsql/stream_label_names":
-		logsqlStreamLabelNamesRequests.Inc()
-		logsql.ProcessStreamLabelNamesRequest(ctx, w, r)
+	case "/select/logsql/stream_field_names":
+		logsqlStreamFieldNamesRequests.Inc()
+		logsql.ProcessStreamFieldNamesRequest(ctx, w, r)
 		return true
-	case "/select/logsql/stream_label_values":
-		logsqlStreamLabelValuesRequests.Inc()
-		logsql.ProcessStreamLabelValuesRequest(ctx, w, r)
+	case "/select/logsql/stream_field_values":
+		logsqlStreamFieldValuesRequests.Inc()
+		logsql.ProcessStreamFieldValuesRequest(ctx, w, r)
 		return true
 	case "/select/logsql/streams":
 		logsqlStreamsRequests.Inc()
@ -192,7 +192,7 @@ var (
 	logsqlFieldValuesRequests       = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
 	logsqlHitsRequests              = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
 	logsqlQueryRequests             = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
-	logsqlStreamLabelNamesRequests  = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`)
-	logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`)
+	logsqlStreamFieldNamesRequests  = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_names"}`)
+	logsqlStreamFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_values"}`)
 	logsqlStreamsRequests           = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`)
 )
--- a/app/vlstorage/main.go
+++ b/app/vlstorage/main.go
@ -123,16 +123,16 @@ func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *log
 	return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
 }

-// GetStreamLabelNames executes q and returns stream labels names seen in results.
-func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
-	return strg.GetStreamLabelNames(ctx, tenantIDs, q)
+// GetStreamFieldNames executes q and returns stream field names seen in results.
+func GetStreamFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
+	return strg.GetStreamFieldNames(ctx, tenantIDs, q)
 }

-// GetStreamLabelValues executes q and returns stream label values for the given labelName seen in results.
+// GetStreamFieldValues executes q and returns stream field values for the given fieldName seen in results.
 //
-// If limit > 0, then up to limit unique stream label values are returned.
-func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]logstorage.ValueWithHits, error) {
-	return strg.GetStreamLabelValues(ctx, tenantIDs, q, labelName, limit)
+// If limit > 0, then up to limit unique stream field values are returned.
+func GetStreamFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]logstorage.ValueWithHits, error) {
+	return strg.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, limit)
 }

 // GetStreams executes q and returns streams seen in query results.
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -19,6 +19,16 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta

 ## tip

+* FEATURE: add [`pack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#pack_json-pipe), which packs all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into a JSON object and stores it into the given field.
+* FEATURE: add [`unroll` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe), which can be used for unrolling JSON arrays stored in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+* FEATURE: add [`replace_regexp` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe), which allows updating [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions.
+* FEATURE: improve performance for [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) and [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) pipes.
+* FEATURE: improve performance for [`/select/logsql/field_names` HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names).
+
+* BUGFIX: prevent from panic in [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) when VictoriaLogs runs on a system with one CPU core.
+* BUGFIX: do not return referenced fields if they weren't present in the original logs. For example, `_time:5m | format if (non_existing_field:"") "abc"` could return empty `non_exiting_field`, while it shuldn't be returned because it is missing in the original logs.
+* BUGFIX: properly initialize values for [`in(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) inside [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the `in(...)` contains other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `_time:5m | filter ip:in(user_type:admin | fields ip)` now works correctly.
+
 ## [v0.11.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.11.0-victorialogs)

 Released at 2024-05-25
@ -63,8 +73,8 @@ Released at 2024-05-22
 * FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt).
 * FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field.
 * FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details.
-* FEATURE: add `/select/logsql/stream_label_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names) for details.
-* FEATURE: add `/select/logsql/stream_label_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values) for details.
+* FEATURE: add `/select/logsql/stream_field_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names) for details.
+* FEATURE: add `/select/logsql/stream_field_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values) for details.
 * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args.

 * BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306).
--- a/docs/VictoriaLogs/LogsQL.md
+++ b/docs/VictoriaLogs/LogsQL.md
@ -37,6 +37,8 @@ For example, the following query finds all the logs with `error` word:
 error
 ```

+See [how to send queries to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
+
 If the queried [word](#word) clashes with LogsQL keywords, then just wrap it into quotes.
 For example, the following query finds all the log messages with `and` [word](#word):

@ -80,11 +82,32 @@ Typical LogsQL query constists of multiple [filters](#filters) joined with `AND`
 So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above:

 ```logsql
-error _time:5m
+_time:5m error
 ```

-The query returns all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) by default.
-See [how to query specific fields](#querying-specific-fields).
+The query returns logs in arbitrary order because sorting of big amounts of logs may require non-trivial amounts of CPU and RAM.
+The number of logs with `error` word over the last 5 minutes isn't usually too big (e.g. less than a few millions), so it is OK to sort them with [`sort` pipe](#sort-pipe).
+The following query sorts the selected logs by [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) field:
+
+```logsql
+_time:5m error | sort by (_time)
+```
+
+It is unlikely you are going to investigate more than a few hundreds of logs returned by the query above. So you can limit the number of returned logs
+with [`limit` pipe](#limit-pipe). The following query returns the last 10 logs with the `error` word over the last 5 minutes:
+
+```logsql
+_time:5m error | sort by (_time) desc | limit 10
+```
+
+By default VictoriaLogs returns all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+If you need only the given set of fields, then add [`fields` pipe](#fields-pipe) to the end of the query. For example, the following query returns only
+[`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field), [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
+and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields:
+
+```logsql
+error _time:5m | fields _time, _stream, _msg
+```

 Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line.
 Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors:
@ -93,8 +116,10 @@ Then the following query removes all the logs from the buggy app, allowing us pa
 _time:5m error NOT buggy_app
 ```

-This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char.
-So the following query is equivalent to the previous one:
+This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char
+(the `!` char is used instead of `-` char as a shorthand for `NOT` operator becasue it nicely combines with [`=`](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter)
+and [`~`](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) filters like `!=` and `!~`).
+The following query is equivalent to the previous one:

 ```logsql
 _time:5m error !buggy_app
@ -113,17 +138,15 @@ This query can be rewritten to more clear query with the `OR` [operator](#logica
 _time:5m error !(buggy_app OR foobar)
 ```

-Note that the parentheses are required here, since otherwise the query won't return the expected results.
-The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar`. This query may return error logs
-from the buggy app if they contain `foobar` [word](#word). This query also continues returning all the error logs from the second buggy app.
-This is because of different priorities for `NOT`, `AND` and `OR` operators.
-Read [these docs](#logical-filter) for more details. There is no need in remembering all these priority rules -
-just wrap the needed query parts into explicit parentheses if you aren't sure in priority rules.
+The parentheses are **required** here, since otherwise the query won't return the expected results.
+The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar` according to [priorities for AND, OR and NOT operator](#logical-filters).
+This query returns logs with `foobar` [word](#word), even if do not contain `error` word or contain `buggy_app` word.
+So it is recommended wrapping the needed query parts into explicit parentheses if you are unsure in priority rules.
 As an additional bonus, explicit parentheses make queries easier to read and maintain.

 Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
-This word can be stored in other [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) such as `log.level`.
-How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word:
+If this word is stored in other [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) such as `log.level`, then add `log.level:` prefix
+in front of the `error` word:

 ```logsq
 _time:5m log.level:error !(buggy_app OR foobar)
@ -158,8 +181,16 @@ If the `app` field is associated with the log stream, then the query above can b
 _time:5m log.level:error _stream:{app!~"buggy_app|foobar"}
 ```

-This query completely skips scanning for logs from `buggy_app` and `foobar` apps, thus significantly reducing disk read IO and CPU time
-needed for performing the query.
+This query skips scanning for [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) from `buggy_app` and `foobar` apps.
+It inpsects only `log.level` and [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) labels.
+This significantly reduces disk read IO and CPU time needed for performing the query.
+
+LogsQL also provides [functions for statistics calculation](#stats-pipe) over the selected logs. For example, the following query returns the number of logs
+with the `error` word for the last 5 minutes:
+
+```logsql
+_time:5m error | stats count() logs_with_error
+```

 Finally, it is recommended reading [performance tips](#performance-tips).

@ -177,13 +208,16 @@ These words are taken into account by full-text search filters such as

 #### Query syntax

-LogsQL query must contain [filters](#filters) for selecting the matching logs. At least a single filter is required.
+LogsQL query must contain at least a single [filter](#filters) for selecting the matching logs.
 For example, the following query selects all the logs for the last 5 minutes by using [`_time` filter](#time-filter):

 ```logsql
 _time:5m
 ```

+Tip: try [`*` filter](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter), which selects all the logs stored in VictoriaLogs.
+Do not worry - this doesn't crash VictoriaLogs, even if it contains trillions of logs. In the worst case it will return 
+
 Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes).
 For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
 with the `error` [word](#word) for the last 5 minutes:
@ -1080,13 +1114,16 @@ LogsQL supports the following pipes:
 - [`format`](#format-pipe) formats ouptut field from input [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
 - [`limit`](#limit-pipe) limits the number selected logs.
 - [`offset`](#offset-pipe) skips the given number of selected logs.
+- [`pack_json`](#pack_json-pipe) packs [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into JSON object.
 - [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
 - [`replace`](#replace-pipe) replaces substrings in the specified [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+- [`replace_regexp`](#replace_regexp-pipe) updates [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions.
 - [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
 - [`stats`](#stats-pipe) calculates various stats over the selected logs.
 - [`uniq`](#uniq-pipe) returns unique log entires.
 - [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
 - [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+- [`unroll`](#unroll-pipe) unrolls JSON arrays from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).

 ### copy pipe

@ -1178,6 +1215,9 @@ For example, the following query preserves the original `ip` field value if `foo
 _time:5m | extract 'ip=<ip> ' from foo skip_empty_results
 ```

+Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `extract`.
+See [general performance tips](#performance-tips) for details.
+
 See also:

 - [Format for extract pipe pattern](#format-for-extract-pipe-pattern)
@ -1363,10 +1403,14 @@ when at least `field1` or `field2` aren't empty, while preserving the original `
 _time:5m | format "<field1><field2>" as foo skip_empty_results
 ```

+Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `format`.
+See [general performance tips](#performance-tips) for details.
+
 See also:

 - [Conditional format](#conditional-format)
 - [`replace` pipe](#replace-pipe)
+- [`replace_regexp` pipe](#replace_regexp-pipe)
 - [`extract` pipe](#extract-pipe)


@ -1419,6 +1463,37 @@ See also:
 - [`limit` pipe](#limit-pipe)
 - [`sort` pipe](#sort-pipe)

+### pack_json pipe
+
+`| pack_json as field_name` [pipe](#pipe) packs all [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into JSON object
+and stores its as a string in the given `field_name`.
+
+For example, the following query packs all the fields into JSON object and stores it into [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
+for logs over the last 5 minutes:
+
+```logsql
+_time:5m | pack_json as _msg
+```
+
+The `as _msg` part can be omitted if packed JSON object is stored into [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
+The following query is equivalent to the previous one:
+
+```logsql
+_time:5m | pack_json
+```
+
+The `pack_json` doesn't touch other labels. If you do not need them, then add [`| fields ...`](#fields-pipe) after the `pack_json` pipe. For example, the following query
+leaves only the `foo` label with the original log fields packed into JSON:
+
+```logsql
+_time:5m | pack_json as foo | fields foo
+```
+
+See also:
+
+- [`unpack_json` pipe](#unpack_json-pipe)
+
+
 ### rename pipe

 If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be renamed, then `| rename src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
@ -1470,9 +1545,13 @@ at the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#da
 _time:5m | replace ('foo', 'bar') at baz limit 1
 ```

+Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `replace`.
+See [general performance tips](#performance-tips) for details.
+
 See also:

 - [Conditional replace](#conditional-replace)
+- [`replace_regexp` pipe](#replace_regexp-pipe)
 - [`format` pipe](#format-pipe)
 - [`extract` pipe](#extract-pipe)

@ -1487,6 +1566,58 @@ only if `user_type` field equals to `admin`:
 _time:5m | replace if (user_type:=admin) replace ("secret", "***") at password
 ```

+### replace_regexp pipe
+
+`| replace_regexp ("regexp", "replacement") at field` [pipe](#pipes) replaces all the substrings matching the given `regexp` with the given `replacement`
+in the given [`field`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+
+The `regexp` must contain regular expression with [RE2 syntax](https://github.com/google/re2/wiki/Syntax).
+The `replacement` may contain `$N` or `${N}` placeholders, which are substituted with the `N-th` capturing group in the `regexp`.
+
+For example, the following query replaces all the substrings starting with `host-` and ending with `-foo` with the contents between `host-` and `-foo` in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) for logs over the last 5 minutes:
+
+```logsql
+_time:5m | replace_regexp ("host-(.+?)-foo", "$1") at _msg
+```
+
+The `at _msg` part can be omitted if the replacement occurs in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
+The following query is equivalent to the previous one:
+
+```logsql
+_time:5m | replace_regexp ("host-(.+?)-foo", "$1")
+```
+
+The number of replacements can be limited with `limit N` at the end of `replace`. For example, the following query replaces only the first `password: ...` substring
+ending with whitespace with empty substring at the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) `baz`:
+
+```logsql
+_time:5m | replace_regexp ('password: [^ ]+', '') at baz limit 1
+```
+
+Performance tips:
+
+- It is recommended using [`replace` pipe](#replace-pipe) instead of `replace_regexp` if possible, since it works faster.
+- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `replace`.
+  See [general performance tips](#performance-tips) for details.
+
+See also:
+
+- [Conditional replace_regexp](#conditional-replace_regexp)
+- [`replace` pipe](#replace-pipe)
+- [`format` pipe](#format-pipe)
+- [`extract` pipe](#extract-pipe)
+
+#### Conditional replace_regexp
+
+If the [`replace_regexp` pipe](#replace-pipe) musn't be applied to every [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
+then add `if (<filters>)` after `replace_regexp`.
+The `<filters>` can contain arbitrary [filters](#filters). For example, the following query replaces `password: ...` substrings ending with whitespace
+with `***` in the `foo` field only if `user_type` field equals to `admin`:
+
+```logsql
+_time:5m | replace_regexp if (user_type:=admin) replace ("password: [^ ]+", "") at foo
+```
+
 ### sort pipe

 By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) can be used.
@ -1720,10 +1851,10 @@ _time:5m | uniq by (host, path)

 The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.

-Add `hits` after `uniq by (...)` in order to return the number of matching logs per each field value:
+Add `with hits` after `uniq by (...)` in order to return the number of matching logs per each field value:

 ```logsql
-_time:5m | uniq by (host) hits
+_time:5m | uniq by (host) with hits
 ```

 Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
@ -1802,15 +1933,22 @@ form `foo`:
 _time:5m | unpack_json from foo result_prefix "foo_"
 ```

-Performance tip: it is better from performance and resource usage PoV ingesting parsed JSON logs into VictoriaLogs
+Performance tips:
+
+- It is better from performance and resource usage PoV ingesting parsed JSON logs into VictoriaLogs
  according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
  instead of ingesting unparsed JSON lines into VictoriaLogs and then parsing them at query time with [`unpack_json` pipe](#unpack_json-pipe).

+- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `unpack_json`.
+  See [general performance tips](#performance-tips) for details.
+
 See also:

 - [Conditional `unpack_json`](#conditional-unpack_json)
 - [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
 - [`extract` pipe](#extract-pipe)
+- [`unroll` pipe](#unroll-pipe)
+- [`pack_json` pipe](#pack_json-pipe)

 #### Conditional unpack_json

@ -1879,10 +2017,15 @@ from `foo` field:
 _time:5m | unpack_logfmt from foo result_prefix "foo_"
 ```

-Performance tip: it is better from performance and resource usage PoV ingesting parsed [logfmt](https://brandur.org/logfmt) logs into VictoriaLogs
+Performance tips:
+
+- It is better from performance and resource usage PoV ingesting parsed [logfmt](https://brandur.org/logfmt) logs into VictoriaLogs
  according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
  instead of ingesting unparsed logfmt lines into VictoriaLogs and then parsing them at query time with [`unpack_logfmt` pipe](#unpack_logfmt-pipe).

+- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `unpack_logfmt`.
+  See [general performance tips](#performance-tips) for details.
+
 See also:

 - [Conditional unpack_logfmt](#conditional-unpack_logfmt)
@ -1900,6 +2043,34 @@ only if `ip` field in the current log entry isn't set or empty:
 _time:5m | unpack_logfmt if (ip:"") from foo
 ```

+### unroll pipe
+
+`| unroll by (field1, ..., fieldN)` [pipe](#pipes) can be used for unrolling JSON arrays from `field1`, `fieldN`
+[log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into separate rows.
+
+For example, the following query unrolls `timestamp` and `value` [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) from logs for the last 5 minutes:
+
+```logsql
+_time:5m | unroll (timestamp, value)
+```
+
+See also:
+
+- [`unpack_json` pipe](#unpack_json-pipe)
+- [`extract` pipe](#extract-pipe)
+- [`uniq_values` stats function](#uniq_values-stats)
+- [`values` stats function](#values-stats)
+
+#### Conditional unroll
+
+If the [`unroll` pipe](#unpack_logfmt-pipe) musn't be applied to every [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
+then add `if (<filters>)` after `unroll`.
+The `<filters>` can contain arbitrary [filters](#filters). For example, the following query unrolls `value` field only if `value_type` field equals to `json_array`:
+
+```logsql
+_time:5m | unroll if (value_type:="json_array") (value)
+```
+
 ## stats pipe functions

 LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
@ -2204,6 +2375,8 @@ over logs for the last 5 minutes:
 _time:5m | stats uniq_values(ip) unique_ips
 ```

+The returned unique ip addresses can be unrolled into distinct log entries with [`unroll` pipe](#unroll-pipe).
+
 Every unique value is stored in memory during query execution. Big number of unique values may require a lot of memory. Sometimes it is enough to return
 only a subset of unique values. In this case add `limit N` after `uniq_values(...)` in order to limit the number of returned unique values to `N`,
 while limiting the maximum memory usage.
@ -2236,6 +2409,8 @@ over logs for the last 5 minutes:
 _time:5m | stats values(ip) ips
 ```

+The returned ip addresses can be unrolled into distinct log entries with [`unroll` pipe](#unroll-pipe).
+
 See also:

 - [`uniq_values`](#uniq_values-stats)
@ -2257,8 +2432,9 @@ LogsQL supports the following transformations on the log entries selected with [
  See [these docs](#extract-pipe) for details.
 - Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_json-pipe).
 - Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_logfmt-pipe).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided format. See [these docs](#format-pipe).
- Replacing substrings in the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#replace-pipe).
+- Creating a new field from existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided format. See [`format` pipe](#format-pipe).
+- Replacing substrings in the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
+  See [`replace` pipe](#replace-pipe) and [`replace_regexp` pipe](#replace_regexp-pipe) docs.

 LogsQL will support the following transformations in the future:

@ -2350,3 +2526,5 @@ Internally duration values are converted into nanoseconds.
  This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
 - Move more specific filters, which match lower number of log entries, to the beginning of the query.
  This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
+- If the selected logs are passed to [pipes](#pipes) for further transformations and statistics' calculations, then it is recommended
+  reducing the number of selected logs by using more specific [filters](#filters), which return lower number of logs to process by [pipes](#pipes).
--- a/docs/VictoriaLogs/keyConcepts.md
+++ b/docs/VictoriaLogs/keyConcepts.md
@ -74,14 +74,14 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
  }
  ```

-Both label name and label value may contain arbitrary chars. Such chars must be encoded
+Both field name and field value may contain arbitrary chars. Such chars must be encoded
 during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
 according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
 Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:

 ```json
 {
-  "label with whitepsace": "value\nwith\nnewlines",
+  "field with whitepsace": "value\nwith\nnewlines",
  "Поле": "价值",
 }
 ```
@ -89,13 +89,11 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
 VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs.
 This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields.

-VictoriaLogs supports the following field types:
+VictoriaLogs supports the following special fields additionally to arbitrary [other fields](#other-field):

 * [`_msg` field](#message-field)
 * [`_time` field](#time-field)
 * [`_stream` fields](#stream-fields)
-* [other fields](#other-fields)
-

 ### Message field

@ -116,7 +114,9 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
 ### Time field

 The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry.
-For example:
+The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. The most commonly used subset of [ISO8601](https://en.wikipedia.org/wiki/ISO_8601)
+is also supported. It is allowed specifying seconds part of the timestamp with any precision up to nanoseconds.
+For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field:

 ```json
 {
@ -132,29 +132,39 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges

 If `_time` field is missing, then the data ingestion time is used as log entry timestamp.

-The log entry timestamp allows quickly narrowing down the search to a particular time range.
-See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details.
+The `_time` field is used in [time filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for quickly narrowing down
+the search to a particular time range.

 ### Stream fields

 Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries.
-This may be either a single field such as `instance=host123:456` or a set of fields such as
-`(datacenter=..., env=..., job=..., instance=...)` or
-`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`.
+This may be either a single field such as `instance="host123:456"` or a set of fields such as
+`{datacenter="...", env="...", job="...", instance="..."}` or
+`{kubernetes.namespace="...", kubernetes.node.name="...", kubernetes.pod.name="...", kubernetes.container.name="..."}`.

-Log entries received from a single application instance form a log stream in VictoriaLogs.
-VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits:
+Log entries received from a single application instance form a **log stream** in VictoriaLogs.
+VictoriaLogs optimizes storing and [querying](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) of individual log streams.
+This provides the following benefits:

 - Reduced disk space usage, since a log stream from a single application instance is usually compressed better
  than a mixed log stream from multiple distinct applications.

 - Increased query performance, since VictoriaLogs needs to scan lower amounts of data
-  when [searching by stream labels](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
+  when [searching by stream fields](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).

-VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream,
-so it stores all the received log entries in a single default stream - `{}`.
-This may lead to not-so-optimal resource usage and query performance.
+Every ingested log entry is associated with a log stream. The name of this stream is stored in `_stream` field.
+This field has the format similar to [labels in Prometheus metrics](https://docs.victoriametrics.com/keyconcepts/#labels):

+```
+{field1="value1", ..., fieldN="valueN"}
+```
+
+For example, if `host` and `app` fields are associated with the stream, then the `_stream` field will have `{host="host-123",app="my-app"}` value
+for the log entry with `host="host-123"` and `app="my-app"` fields. The `_stream` field can be searched
+with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
+
+By default the value of `_stream` field is `{}`, since VictoriaLogs cannot determine automatically,
+which fields uniquely identify every log stream. This may lead to not-so-optimal resource usage and query performance.
 Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
 during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
 For example, if logs from Kubernetes containers have the following fields:
@ -175,20 +185,17 @@ per-container logs into distinct streams.

 #### How to determine which fields must be associated with log streams?

-[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions:
+[Log streams](#stream-fields) must contain [fields](#data-model), which uniquely identify the application instance, which generates logs.
+For example, `container`, `instance` and `host` are good candidates for stream fields.

- Fields, which remain constant across log entries received from a single application instance.
- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc.
+Additional fields may be added to log streams if they **remain constant during application instance lifetime**.
+For example, `namespace`, `node`, `pod` and `job` are good candidates for additional stream fields. Adding such fields to log streams
+makes sense if you are going to use these fields during search and want speeding up it with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).

-Sometimes a single application instance may generate multiple log streams and store them into distinct log files.
-In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields.
+There is **no need to add all the constant fields to log streams**, since this may increase resource usage during data ingestion and querying.

-Structured logs may contain big number of fields, which do not change across log entries received from a single application instance.
-There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify
-the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
-can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
-
-Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
+**Never add non-nonstant fields to streams if these fields may change with every log entry of the same stream**.
+For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](#high-cardinality).

 #### High cardinality

@ -196,8 +203,7 @@ Some fields in the [ingested logs](#data-model) may contain big number of unique
 For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values.
 VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields).

-Never associate high-cardinality fields with [log streams](#stream-fields), since this may result
-to the following issues:
+**Never** associate high-cardinality fields with [log streams](#stream-fields), since this may lead to the following issues:

 - Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
  and [querying](https://docs.victoriametrics.com/victorialogs/querying/)
@ -214,9 +220,9 @@ This can help narrowing down and eliminating high-cardinality fields from [log s

 ### Other fields

-The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries.
-For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message.
-E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
+Every ingested log entry may contain arbitrary number of [fields](#data-model) additionally to [`_msg`](#message-field) and [`_time`](#time-field).
+For example, `level`, `ip`, `user_id`, `trace_id`, etc. Such fields can be used for simplifying and optimizing [search queries](#https://docs.victoriametrics.com/victorialogs/logsql/).
+It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](#message-field).
+E.g. the `trace_id:="XXXX-YYYY-ZZZZ"` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.

 See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details.
-
--- a/docs/VictoriaLogs/querying/README.md
+++ b/docs/VictoriaLogs/querying/README.md
@ -28,8 +28,8 @@ VictoriaLogs provides the following HTTP endpoints:
 - [`/select/logsql/query`](#querying-logs) for querying logs
 - [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range
 - [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
- [`/select/logsql/stream_label_names`](#querying-stream-label-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names
- [`/select/logsql/stream_label_values`](#querying-stream-label-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values
+- [`/select/logsql/stream_field_names`](#querying-stream-field-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names
+- [`/select/logsql/stream_field_values`](#querying-stream-field-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values
 - [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names.
 - [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values.

@ -43,8 +43,8 @@ For example, the following query returns all the log entries with the `error` wo
 curl http://localhost:9428/select/logsql/query -d 'query=error'
 ```

-The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
-See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
+The response by default contains all the [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the selected logs.
+Use [`fields` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#fields-pipe) for selecting only the needed fields.

 The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body
 with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries
@ -56,7 +56,8 @@ or similar tools.

 By default the `/select/logsql/query` returns all the log entries matching the given `query`. The response size can be limited in the following ways:

- By closing the response stream at any time. In this case VictoriaLogs stops query execution and frees all the resources occupied by the request.
+- By closing the response stream at any time. VictoriaLogs stops query execution and frees all the resources occupied by the request as soon as it detects closed client connection.
+  So it is safe running [`*` query](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter), which selects all the logs, even if trillions of logs are stored in VictoriaLogs.
 - By specifying the maximum number of log entries, which can be returned in the response via `limit` query arg. For example, the following request returns
  up to 10 matching log entries:
  ```sh
@ -68,7 +69,7 @@ By default the `/select/logsql/query` returns all the log entries matching the g
  ```
 - By adding [`_time` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). The time range for the query can be specified via optional
  `start` and `end` query ars formatted according to [these docs](https://docs.victoriametrics.com/single-server-victoriametrics/#timestamp-formats).
- By adding other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query.
+- By adding more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query, which select lower number of logs.

 The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://jsonlines.org/),
 where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`.
@ -79,18 +80,18 @@ Example response:
 {"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"}
 ```

-The matching lines are sent to the response stream as soon as they are found in VictoriaLogs storage.
+Logs lines are sent to the response stream as soon as they are found in VictoriaLogs storage.
 This means that the returned response may contain billions of lines for queries matching too many log entries.
 The response can be interrupted at any time by closing the connection to VictoriaLogs server.
-This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc.
-See [these docs](#command-line) for more details.
+This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc.,
+without worrying about resource usage at VictoriaLogs side. See [these docs](#command-line) for more details.

-The returned lines aren't sorted, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
-Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe)
+The returned lines aren't sorted by default, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
+Query results can be sorted either at VictoriaLogs side via [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe)
 or at client side with the usual `sort` command according to [these docs](#command-line).

 By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) is queried.
-If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches
+If you need querying other tenant, then specify it via `AccounID` and `ProjectID` http request headers. For example, the following query searches
 for log messages at `(AccountID=12, ProjectID=34)` tenant:

 ```sh
@ -100,14 +101,20 @@ curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID:
 The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/victorialogs/#monitoring)
 with `vl_http_requests_total{path="/select/logsql/query"}` metric.

+See also:
+
 - [Querying hits stats](#querying-hits-stats)
 - [Querying streams](#querying-streams)
- [HTTP API](#http-api)
+- [Querying stream field names](#querying-stream-field-names)
+- [Querying stream field values](#querying-stream-field-values)
+- [Querying field names](#querying-field-names)
+- [Querying field values](#querying-field-values)
+

 ### Querying hits stats

 VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
-of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
+of matching log entries for the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
 time range grouped by `<step>` buckets. The returned results are sorted by time.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -210,7 +217,7 @@ See also:
 ### Querying streams

 VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
-from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
+from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
 The response also contains the number of log results per every `stream`.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -254,22 +261,22 @@ See also:
 - [Querying hits stats](#querying-hits-stats)
 - [HTTP API](#http-api)

-### Querying stream label names
+### Querying stream field names

-VictoriaLogs provides `/select/logsql/stream_label_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns
-[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results
-of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
-The response also contains the number of log results per every label name.
+VictoriaLogs provides `/select/logsql/stream_field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns
+[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results
+of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
+The response also contains the number of log results per every field name.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
 If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
 If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.

-For example, the following command returns stream label names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
+For example, the following command returns stream field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
 for the last 5 minutes:

 ```sh
-curl http://localhost:9428/select/logsql/stream_label_names -d 'query=error' -d 'start=5m'
+curl http://localhost:9428/select/logsql/stream_field_names -d 'query=error' -d 'start=5m'
 ```

 Below is an example JSON output returned from this endpoint:
@ -295,27 +302,27 @@ Below is an example JSON output returned from this endpoint:

 See also:

- [Querying stream label names](#querying-stream-label-names)
+- [Querying stream field names](#querying-stream-field-names)
 - [Querying field values](#querying-field-values)
 - [Querying streams](#querying-streams)
 - [HTTP API](#http-api)

-### Querying stream label values
+### Querying stream field values

-VictoriaLogs provides `/select/logsql/stream_label_values?query=<query>&start=<start>&<end>&label=<labelName>` HTTP endpoint,
-which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the label with the given `<labelName>` name
-from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
-The response also contains the number of log results per every label value.
+VictoriaLogs provides `/select/logsql/stream_field_values?query=<query>&start=<start>&<end>&field=<fieldName>` HTTP endpoint,
+which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the field with the given `<fieldName>` name
+from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
+The response also contains the number of log results per every field value.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
 If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
 If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.

-For example, the following command returns values for the stream label `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
+For example, the following command returns values for the stream field `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
 for the last 5 minutes:

 ```sh
-curl http://localhost:9428/select/logsql/stream_label_values -d 'query=error' -d 'start=5m' -d 'label=host'
+curl http://localhost:9428/select/logsql/stream_field_values -d 'query=error' -d 'start=5m' -d 'field=host'
 ```

 Below is an example JSON output returned from this endpoint:
@ -335,12 +342,12 @@ Below is an example JSON output returned from this endpoint:
 }
 ```

-The `/select/logsql/stream_label_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
+The `/select/logsql/stream_field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
 The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.

 See also:

- [Querying stream label values](#querying-stream-label-values)
+- [Querying stream field values](#querying-stream-field-values)
 - [Querying field names](#querying-field-names)
 - [Querying streams](#querying-streams)
 - [HTTP API](#http-api)
@ -348,7 +355,7 @@ See also:
 ### Querying field names

 VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
-from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
+from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
 The response also contains the number of log results per every field name.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -385,7 +392,7 @@ Below is an example JSON output returned from this endpoint:

 See also:

- [Querying stream label names](#querying-stream-label-names)
+- [Querying stream field names](#querying-stream-field-names)
 - [Querying field values](#querying-field-values)
 - [Querying streams](#querying-streams)
 - [HTTP API](#http-api)
@ -394,7 +401,7 @@ See also:

 VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
 unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
-from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
+from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
 The response also contains the number of log results per every field value.

 The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -435,7 +442,7 @@ When the `limit` is reached, `hits` are zeroed, since they cannot be calculated

 See also:

- [Querying stream label values](#querying-stream-label-values)
+- [Querying stream field values](#querying-stream-field-values)
 - [Querying field names](#querying-field-names)
 - [Querying streams](#querying-streams)
 - [HTTP API](#http-api)
@ -454,32 +461,25 @@ There are three modes of displaying query results:
 - `Table` - displays query results as a table.
 - `JSON` - displays raw JSON response from [HTTP API](#http-api).

-This is the first version that has minimal functionality. It comes with the following limitations:
-
- The number of query results is always limited to 1000 lines. Iteratively add
-  more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query
-  in order to get full response with less than 1000 lines.
- Queries are always executed against [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) `0`.
-
-These limitations will be removed in future versions.
-
-To get around the current limitations, you can use an alternative - the [command line interface](#command-line).
+This is the first version that has minimal functionality and may contain bugs.
+It is recommended trying [command line interface](#command-line), which has no known bugs :)

 ## Command-line

 VictoriaLogs integrates well with `curl` and other command-line tools during querying because of the following features:

- VictoriaLogs sends the matching log entries to the response stream as soon as they are found.
-  This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix)).
- VictoriaLogs automatically adjusts query execution speed to the speed of the client, which reads the response stream.
+- Matching log entries are sent to the response stream as soon as they are found.
+  This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix))
+  without waiting until the response finishes.
+- Query execution speed is automatically adjusted to the speed of the client, which reads the response stream.
  For example, if the response stream is piped to `less` command, then the query is suspended
  until the `less` command reads the next block from the response stream.
- VictoriaLogs automatically cancels query execution when the client closes the response stream.
+- Query is automatically canceled when the client closes the response stream.
  For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query
  when the `head` command closes the response stream.

 These features allow executing queries at command-line interface, which potentially select billions of rows,
-without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server.
+without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs.

 For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains
 many log messages with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word):
@ -488,8 +488,8 @@ many log messages with the `error` [word](https://docs.victoriametrics.com/victo
 curl http://localhost:9428/select/logsql/query -d 'query=error'
 ```

-If the command returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query.
-VictoriaLogs notices that the response stream is closed, so it cancels the query and instantly stops consuming CPU, RAM and disk IO for this query.
+If the command above returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query.
+VictoriaLogs notices that the response stream is closed, so it cancels the query and stops consuming CPU, RAM and disk IO for this query.

 Then just use `head` command for investigating the returned log messages and narrowing down the query:

@ -500,6 +500,12 @@ curl http://localhost:9428/select/logsql/query -d 'query=error' | head -10
 The `head -10` command reads only the first 10 log messages from the response and then closes the response stream.
 This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources.

+Alternatively, you can limit the number of returned logs at VictoriaLogs side via [`limit` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe):
+
+```sh
+curl http://localhost:9428/select/logsql/query -d 'query=error | limit 10'
+```
+
 Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response:

 ```sh
@ -509,7 +515,7 @@ curl http://localhost:9428/select/logsql/query -d 'query=error' | less
 The `less` command reads the response stream on demand, when the user scrolls down the output.
 VictoriaLogs suspends query execution when `less` stops reading the response stream.
 It doesn't consume CPU and disk IO resources during this time. It resumes query execution
-when the `less` continues reading the response stream.
+after the `less` continues reading the response stream.

 Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain
 `cannot open file` [phrase](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
@ -543,7 +549,13 @@ See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stream-fi
 [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) about `_time` filter
 and [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) about `AND` operator.

-The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field):
+Alternatively, you can count the number of matching logs at VictoriaLogs side with [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe):
+
+```sh
+curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:5m AND error | stats count() logs_with_error'
+```
+
+The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) with traditional Unix tools:

 ```sh
 curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less
@ -558,8 +570,14 @@ can take non-trivial amounts of time if the `query` returns too many results. Th
 before sorting the results. See [these tips](https://docs.victoriametrics.com/victorialogs/logsql/#performance-tips)
 on how to narrow down query results.

+Alternatively, sorting of matching logs can be performed at VictoriaLogs side via [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe):
+
+```sh
+curl http://localhost:9428/select/logsql/query -d 'query=error | sort by (_time)' | less
+```
+
 The following example calculates stats on the number of log messages received during the last 5 minutes
-grouped by `log.level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model):
+grouped by `log.level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with traditional Unix tools:

 ```sh
 curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:*' | jq -r '."log.level"' | sort | uniq -c 
@ -569,6 +587,12 @@ The query selects all the log messages with non-empty `log.level` field via ["an
 then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values
 are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats.

+Alternatively, all the stats calculations above can be performed at VictoriaLogs side via [`stats by(...)`](https://docs.victoriametrics.com/victorialogs/logsql/#stats-by-fields):
+
+```sh
+curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:* | stats by (log.level) count() matching_logs'
+```
+
 See also:

 - [Key concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/).
--- a/lib/logstorage/block_result.go
+++ b/lib/logstorage/block_result.go
@ -31,11 +31,17 @@ type blockResult struct {
 	// csBuf contains requested columns.
 	csBuf []blockResultColumn

+	// csEmpty contains non-existing columns, which were referenced via getColumnByName()
+	csEmpty []blockResultColumn
+
 	// cs contains cached pointers to requested columns returned from getColumns() if csInitialized=true.
 	cs []*blockResultColumn

 	// csInitialized is set to true if cs is properly initialized and can be returned from getColumns().
 	csInitialized bool
+
+	fvecs []filteredValuesEncodedCreator
+	svecs []searchValuesEncodedCreator
 }

 func (br *blockResult) reset() {
@ -49,10 +55,19 @@ func (br *blockResult) reset() {
 	clear(br.csBuf)
 	br.csBuf = br.csBuf[:0]

+	clear(br.csEmpty)
+	br.csEmpty = br.csEmpty[:0]
+
 	clear(br.cs)
 	br.cs = br.cs[:0]

 	br.csInitialized = false
+
+	clear(br.fvecs)
+	br.fvecs = br.fvecs[:0]
+
+	clear(br.svecs)
+	br.svecs = br.svecs[:0]
 }

 // clone returns a clone of br, which owns its own data.
@ -88,6 +103,10 @@ func (br *blockResult) clone() *blockResult {
 	}
 	brNew.csBuf = csNew

+	// do not clone br.csEmpty - it will be populated by the caller via getColumnByName().
+
+	// do not clone br.fvecs and br.svecs, since they may point to external data.
+
 	return brNew
 }

@ -128,6 +147,9 @@ func (br *blockResult) initFromFilterNeededColumns(brSrc *blockResult, bm *bitma
 	}
 }

+// appendFilteredColumn adds cSrc with the given bm filter to br.
+//
+// the br is valid until brSrc, cSrc or bm is updated.
 func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) {
 	if len(br.timestamps) == 0 {
 		return
@ -146,23 +168,36 @@ func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResul
 		cDst.minValue = cSrc.minValue
 		cDst.maxValue = cSrc.maxValue
 		cDst.dictValues = cSrc.dictValues
-		cDst.newValuesEncodedFunc = func(br *blockResult) []string {
-			valuesEncodedSrc := cSrc.getValuesEncoded(brSrc)
+		br.fvecs = append(br.fvecs, filteredValuesEncodedCreator{
+			br: brSrc,
+			c:  cSrc,
+			bm: bm,
+		})
+		cDst.valuesEncodedCreator = &br.fvecs[len(br.fvecs)-1]
+	}
+
+	br.csBuf = append(br.csBuf, cDst)
+	br.csInitialized = false
+}
+
+type filteredValuesEncodedCreator struct {
+	br *blockResult
+	c  *blockResultColumn
+	bm *bitmap
+}
+
+func (fvec *filteredValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
+	valuesEncodedSrc := fvec.c.getValuesEncoded(fvec.br)

 	valuesBuf := br.valuesBuf
 	valuesBufLen := len(valuesBuf)
-			bm.forEachSetBitReadonly(func(idx int) {
+	fvec.bm.forEachSetBitReadonly(func(idx int) {
 		valuesBuf = append(valuesBuf, valuesEncodedSrc[idx])
 	})
 	br.valuesBuf = valuesBuf

 	return valuesBuf[valuesBufLen:]
 }
-	}
-
-	br.csBuf = append(br.csBuf, cDst)
-	br.csInitialized = false
-}

 // cloneValues clones the given values into br and returns the cloned values.
 func (br *blockResult) cloneValues(values []string) []string {
@ -287,6 +322,8 @@ func (br *blockResult) initAllColumns(bs *blockSearch, bm *bitmap) {
 			br.addColumn(bs, bm, ch)
 		}
 	}
+
+	br.csInitFast()
 }

 // initRequestedColumns initialized only requested columns in br according to bs and bm.
@ -314,6 +351,8 @@ func (br *blockResult) initRequestedColumns(bs *blockSearch, bm *bitmap) {
 			}
 		}
 	}
+
+	br.csInitFast()
 }

 func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) {
@ -433,13 +472,28 @@ func (br *blockResult) addColumn(bs *blockSearch, bm *bitmap, ch *columnHeader)
 		minValue:   ch.minValue,
 		maxValue:   ch.maxValue,
 		dictValues: ch.valuesDict.values,
-		newValuesEncodedFunc: func(br *blockResult) []string {
-			return br.newValuesEncodedFromColumnHeader(bs, bm, ch)
-		},
 	})
+	c := &br.csBuf[len(br.csBuf)-1]
+
+	br.svecs = append(br.svecs, searchValuesEncodedCreator{
+		bs: bs,
+		bm: bm,
+		ch: ch,
+	})
+	c.valuesEncodedCreator = &br.svecs[len(br.svecs)-1]
 	br.csInitialized = false
 }

+type searchValuesEncodedCreator struct {
+	bs *blockSearch
+	bm *bitmap
+	ch *columnHeader
+}
+
+func (svec *searchValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
+	return br.newValuesEncodedFromColumnHeader(svec.bs, svec.bm, svec.ch)
+}
+
 func (br *blockResult) addTimeColumn() {
 	br.csBuf = append(br.csBuf, blockResultColumn{
 		name:   "_time",
@ -1325,15 +1379,31 @@ func (br *blockResult) getColumnByName(columnName string) *blockResultColumn {
 		return cs[idx]
 	}

-	br.addConstColumn(columnName, "")
-	return &br.csBuf[len(br.csBuf)-1]
+	// Search for empty column with the given name
+	csEmpty := br.csEmpty
+	for i := range csEmpty {
+		if csEmpty[i].name == columnName {
+			return &csEmpty[i]
+		}
+	}
+
+	// Create missing empty column
+	br.csEmpty = append(br.csEmpty, blockResultColumn{
+		name:          br.a.copyString(columnName),
+		isConst:       true,
+		valuesEncoded: getEmptyStrings(1),
+	})
+	return &br.csEmpty[len(br.csEmpty)-1]
 }

 func (br *blockResult) getColumns() []*blockResultColumn {
-	if br.csInitialized {
+	if !br.csInitialized {
+		br.csInit()
+	}
 	return br.cs
 }

+func (br *blockResult) csInit() {
 	csBuf := br.csBuf
 	clear(br.cs)
 	cs := br.cs[:0]
@ -1348,8 +1418,17 @@ func (br *blockResult) getColumns() []*blockResultColumn {
 	}
 	br.cs = cs
 	br.csInitialized = true
+}

-	return br.cs
+func (br *blockResult) csInitFast() {
+	csBuf := br.csBuf
+	clear(br.cs)
+	cs := slicesutil.SetLength(br.cs, len(csBuf))
+	for i := range csBuf {
+		cs[i] = &csBuf[i]
+	}
+	br.cs = cs
+	br.csInitialized = true
 }

 func getBlockResultColumnIdxByName(cs []*blockResultColumn, name string) int {
@ -1444,10 +1523,10 @@ type blockResultColumn struct {
 	// valuesBucketed contains values after getValuesBucketed() call
 	valuesBucketed []string

-	// newValuesEncodedFunc must return valuesEncoded.
+	// valuesEncodedCreator must return valuesEncoded.
 	//
-	// This func must be set for non-const and non-time columns if valuesEncoded field isn't set.
-	newValuesEncodedFunc func(br *blockResult) []string
+	// This interface must be set for non-const and non-time columns if valuesEncoded field isn't set.
+	valuesEncodedCreator columnValuesEncodedCreator

 	// bucketSizeStr contains bucketSizeStr for valuesBucketed
 	bucketSizeStr string
@ -1456,6 +1535,11 @@ type blockResultColumn struct {
 	bucketOffsetStr string
 }

+// columnValuesEncodedCreator must return encoded values for the current column.
+type columnValuesEncodedCreator interface {
+	newValuesEncoded(br *blockResult) []string
+}
+
 // clone returns a clone of c backed by data from br.
 //
 // It is expected that c.valuesEncoded is already initialized for non-time column.
@ -1484,8 +1568,8 @@ func (c *blockResultColumn) clone(br *blockResult) blockResultColumn {
 	}
 	cNew.valuesBucketed = br.cloneValues(c.valuesBucketed)

-	// Do not copy c.newValuesEncodedFunc, since it may refer to data, which may change over time.
-	// We already copied c.valuesEncoded, so cNew.newValuesEncodedFunc must be nil.
+	// Do not copy c.valuesEncodedCreator, since it may refer to data, which may change over time.
+	// We already copied c.valuesEncoded, so cNew.valuesEncodedCreator must be nil.

 	cNew.bucketSizeStr = c.bucketSizeStr
 	cNew.bucketOffsetStr = c.bucketOffsetStr
@ -1579,7 +1663,7 @@ func (c *blockResultColumn) getValuesEncoded(br *blockResult) []string {
 	}

 	if c.valuesEncoded == nil {
-		c.valuesEncoded = c.newValuesEncodedFunc(br)
+		c.valuesEncoded = c.valuesEncodedCreator.newValuesEncoded(br)
 	}
 	return c.valuesEncoded
 }
--- a/lib/logstorage/parser.go
+++ b/lib/logstorage/parser.go
@ -321,23 +321,10 @@ func (q *Query) Optimize() {

 	// Call Optimize for queries from 'in(query)' filters.
 	optimizeFilterIn(q.f)
+
+	// Optimize individual pipes.
 	for _, p := range q.pipes {
-		switch t := p.(type) {
-		case *pipeStats:
-			for _, f := range t.funcs {
-				f.iff.optimizeFilterIn()
-			}
-		case *pipeReplace:
-			t.iff.optimizeFilterIn()
-		case *pipeFormat:
-			t.iff.optimizeFilterIn()
-		case *pipeExtract:
-			t.iff.optimizeFilterIn()
-		case *pipeUnpackJSON:
-			t.iff.optimizeFilterIn()
-		case *pipeUnpackLogfmt:
-			t.iff.optimizeFilterIn()
-		}
+		p.optimize()
 	}
 }

--- a/lib/logstorage/pipe.go
+++ b/lib/logstorage/pipe.go
@ -11,15 +11,26 @@ type pipe interface {
 	// updateNeededFields must update neededFields and unneededFields with fields it needs and not needs at the input.
 	updateNeededFields(neededFields, unneededFields fieldsSet)

-	// newPipeProcessor must return new pipeProcessor for the given ppBase.
+	// newPipeProcessor must return new pipeProcessor, which writes data to the given ppNext.
 	//
 	// workersCount is the number of goroutine workers, which will call writeBlock() method.
 	//
 	// If stopCh is closed, the returned pipeProcessor must stop performing CPU-intensive tasks which take more than a few milliseconds.
 	// It is OK to continue processing pipeProcessor calls if they take less than a few milliseconds.
 	//
-	// The returned pipeProcessor may call cancel() at any time in order to notify worker goroutines to stop sending new data to pipeProcessor.
-	newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor
+	// The returned pipeProcessor may call cancel() at any time in order to notify the caller to stop sending new data to it.
+	newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor
+
+	// optimize must optimize the pipe
+	optimize()
+
+	// hasFilterInWithQuery must return true of pipe contains 'in(subquery)' filter (recursively).
+	hasFilterInWithQuery() bool
+
+	// initFilterInValues must return new pipe with the initialized values for 'in(subquery)' filters (recursively).
+	//
+	// It is OK to return the pipe itself if it doesn't contain 'in(subquery)' filters.
+	initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error)
 }

 // pipeProcessor must process a single pipe.
@ -39,7 +50,7 @@ type pipeProcessor interface {
 	// cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error.
 	writeBlock(workerID uint, br *blockResult)

-	// flush must flush all the data accumulated in the pipeProcessor to the base pipeProcessor.
+	// flush must flush all the data accumulated in the pipeProcessor to the next pipeProcessor.
 	//
 	// flush is called after all the worker goroutines are stopped.
 	//
@ -135,6 +146,12 @@ func parsePipe(lex *lexer) (pipe, error) {
 			return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
 		}
 		return ps, nil
+	case lex.isKeyword("pack_json"):
+		pp, err := parsePackJSON(lex)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse 'pack_json' pipe: %w", err)
+		}
+		return pp, nil
 	case lex.isKeyword("rename", "mv"):
 		pr, err := parsePipeRename(lex)
 		if err != nil {
@ -147,6 +164,12 @@ func parsePipe(lex *lexer) (pipe, error) {
 			return nil, fmt.Errorf("cannot parse 'replace' pipe: %w", err)
 		}
 		return pr, nil
+	case lex.isKeyword("replace_regexp"):
+		pr, err := parsePipeReplaceRegexp(lex)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse 'replace_regexp' pipe: %w", err)
+		}
+		return pr, nil
 	case lex.isKeyword("sort"):
 		ps, err := parsePipeSort(lex)
 		if err != nil {
@ -177,6 +200,12 @@ func parsePipe(lex *lexer) (pipe, error) {
 			return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
 		}
 		return pu, nil
+	case lex.isKeyword("unroll"):
+		pu, err := parsePipeUnroll(lex)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse 'unroll' pipe: %w", err)
+		}
+		return pu, nil
 	default:
 		return nil, fmt.Errorf("unexpected pipe %q", lex.token)
 	}
--- a/lib/logstorage/pipe_copy.go
+++ b/lib/logstorage/pipe_copy.go
@ -50,16 +50,28 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
 	}
 }

-func (pc *pipeCopy) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pc *pipeCopy) optimize() {
+	// Nothing to do
+}
+
+func (pc *pipeCopy) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pc *pipeCopy) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pc, nil
+}
+
+func (pc *pipeCopy) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeCopyProcessor{
 		pc:     pc,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeCopyProcessor struct {
 	pc     *pipeCopy
-	ppBase pipeProcessor
+	ppNext pipeProcessor
 }

 func (pcp *pipeCopyProcessor) writeBlock(workerID uint, br *blockResult) {
@ -68,7 +80,7 @@ func (pcp *pipeCopyProcessor) writeBlock(workerID uint, br *blockResult) {
 	}

 	br.copyColumns(pcp.pc.srcFields, pcp.pc.dstFields)
-	pcp.ppBase.writeBlock(workerID, br)
+	pcp.ppNext.writeBlock(workerID, br)
 }

 func (pcp *pipeCopyProcessor) flush() error {
--- a/lib/logstorage/pipe_delete.go
+++ b/lib/logstorage/pipe_delete.go
@ -32,16 +32,28 @@ func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet)
 	}
 }

-func (pd *pipeDelete) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pd *pipeDelete) optimize() {
+	// nothing to do
+}
+
+func (pd *pipeDelete) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pd *pipeDelete) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pd, nil
+}
+
+func (pd *pipeDelete) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeDeleteProcessor{
 		pd:     pd,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeDeleteProcessor struct {
 	pd     *pipeDelete
-	ppBase pipeProcessor
+	ppNext pipeProcessor
 }

 func (pdp *pipeDeleteProcessor) writeBlock(workerID uint, br *blockResult) {
@ -50,7 +62,7 @@ func (pdp *pipeDeleteProcessor) writeBlock(workerID uint, br *blockResult) {
 	}

 	br.deleteColumns(pdp.pd.fields)
-	pdp.ppBase.writeBlock(workerID, br)
+	pdp.ppNext.writeBlock(workerID, br)
 }

 func (pdp *pipeDeleteProcessor) flush() error {
--- a/lib/logstorage/pipe_extract.go
+++ b/lib/logstorage/pipe_extract.go
@ -2,6 +2,9 @@ package logstorage

 import (
 	"fmt"
+	"unsafe"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
 )

 // pipeExtract processes '| extract ...' pipe.
@ -38,6 +41,24 @@ func (pe *pipeExtract) String() string {
 	return s
 }

+func (pe *pipeExtract) optimize() {
+	pe.iff.optimizeFilterIn()
+}
+
+func (pe *pipeExtract) hasFilterInWithQuery() bool {
+	return pe.iff.hasFilterInWithQuery()
+}
+
+func (pe *pipeExtract) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pe.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	peNew := *pe
+	peNew.iff = iffNew
+	return &peNew, nil
+}
+
 func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
 	if neededFields.contains("*") {
 		unneededFieldsOrig := unneededFields.clone()
@ -80,21 +101,129 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
 	}
 }

-func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
-	patterns := make([]*pattern, workersCount)
-	for i := range patterns {
-		patterns[i] = pe.ptn.clone()
-	}
+func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
+	return &pipeExtractProcessor{
+		pe:     pe,
+		ppNext: ppNext,

-	unpackFunc := func(uctx *fieldsUnpackerContext, s string) {
-		ptn := patterns[uctx.workerID]
-		ptn.apply(s)
-		for _, f := range ptn.fields {
-			uctx.addField(f.name, *f.value)
+		shards: make([]pipeExtractProcessorShard, workersCount),
 	}
 }

-	return newPipeUnpackProcessor(workersCount, unpackFunc, ppBase, pe.fromField, "", pe.keepOriginalFields, pe.skipEmptyResults, pe.iff)
+type pipeExtractProcessor struct {
+	pe     *pipeExtract
+	ppNext pipeProcessor
+
+	shards []pipeExtractProcessorShard
+}
+
+type pipeExtractProcessorShard struct {
+	pipeExtractProcessorShardNopad
+
+	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
+	_ [128 - unsafe.Sizeof(pipeExtractProcessorShardNopad{})%128]byte
+}
+
+type pipeExtractProcessorShardNopad struct {
+	bm  bitmap
+	ptn *pattern
+
+	resultColumns []*blockResultColumn
+	resultValues  []string
+
+	rcs []resultColumn
+	a   arena
+}
+
+func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
+	if len(br.timestamps) == 0 {
+		return
+	}
+
+	pe := pep.pe
+	shard := &pep.shards[workerID]
+
+	bm := &shard.bm
+	bm.init(len(br.timestamps))
+	bm.setBits()
+	if iff := pe.iff; iff != nil {
+		iff.f.applyToBlockResult(br, bm)
+		if bm.isZero() {
+			pep.ppNext.writeBlock(workerID, br)
+			return
+		}
+	}
+
+	if shard.ptn == nil {
+		shard.ptn = pe.ptn.clone()
+	}
+	ptn := shard.ptn
+
+	shard.rcs = slicesutil.SetLength(shard.rcs, len(ptn.fields))
+	rcs := shard.rcs
+	for i := range ptn.fields {
+		rcs[i].name = ptn.fields[i].name
+	}
+
+	c := br.getColumnByName(pe.fromField)
+	values := c.getValues(br)
+
+	shard.resultColumns = slicesutil.SetLength(shard.resultColumns, len(rcs))
+	resultColumns := shard.resultColumns
+	for i := range resultColumns {
+		resultColumns[i] = br.getColumnByName(rcs[i].name)
+	}
+
+	shard.resultValues = slicesutil.SetLength(shard.resultValues, len(rcs))
+	resultValues := shard.resultValues
+
+	hadUpdates := false
+	vPrev := ""
+	for rowIdx, v := range values {
+		if bm.isSetBit(rowIdx) {
+			if !hadUpdates || vPrev != v {
+				vPrev = v
+				hadUpdates = true
+
+				ptn.apply(v)
+
+				for i, f := range ptn.fields {
+					v := *f.value
+					if v == "" && pe.skipEmptyResults || pe.keepOriginalFields {
+						c := resultColumns[i]
+						if vOrig := c.getValueAtRow(br, rowIdx); vOrig != "" {
+							v = vOrig
+						}
+					} else {
+						v = shard.a.copyString(v)
+					}
+					resultValues[i] = v
+				}
+			}
+		} else {
+			for i, c := range resultColumns {
+				resultValues[i] = c.getValueAtRow(br, rowIdx)
+			}
+		}
+
+		for i, v := range resultValues {
+			rcs[i].addValue(v)
+		}
+	}
+
+	for i := range rcs {
+		br.addResultColumn(&rcs[i])
+	}
+	pep.ppNext.writeBlock(workerID, br)
+
+	for i := range rcs {
+		rcs[i].reset()
+	}
+	shard.a.reset()
+}
+
+func (pep *pipeExtractProcessor) flush() error {
+	return nil
 }

 func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
--- a/lib/logstorage/pipe_field_names.go
+++ b/lib/logstorage/pipe_field_names.go
@ -37,13 +37,25 @@ func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fields
 	}
 }

-func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pf *pipeFieldNames) optimize() {
+	// nothing to do
+}
+
+func (pf *pipeFieldNames) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pf *pipeFieldNames) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pf, nil
+}
+
+func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	shards := make([]pipeFieldNamesProcessorShard, workersCount)

 	pfp := &pipeFieldNamesProcessor{
 		pf:     pf,
 		stopCh: stopCh,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,
 	}
@ -53,7 +65,7 @@ func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struc
 type pipeFieldNamesProcessor struct {
 	pf     *pipeFieldNames
 	stopCh <-chan struct{}
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeFieldNamesProcessorShard
 }
@ -172,10 +184,10 @@ func (wctx *pipeFieldNamesWriteContext) flush() {

 	wctx.valuesLen = 0

-	// Flush rcs to ppBase
+	// Flush rcs to ppNext
 	br.setResultColumns(wctx.rcs[:], wctx.rowsCount)
 	wctx.rowsCount = 0
-	wctx.pfp.ppBase.writeBlock(0, br)
+	wctx.pfp.ppNext.writeBlock(0, br)
 	br.reset()
 	wctx.rcs[0].resetValues()
 	wctx.rcs[1].resetValues()
--- a/lib/logstorage/pipe_fields.go
+++ b/lib/logstorage/pipe_fields.go
@ -49,16 +49,28 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
 	unneededFields.reset()
 }

-func (pf *pipeFields) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pf *pipeFields) optimize() {
+	// nothing to do
+}
+
+func (pf *pipeFields) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pf *pipeFields) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pf, nil
+}
+
+func (pf *pipeFields) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeFieldsProcessor{
 		pf:     pf,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeFieldsProcessor struct {
 	pf     *pipeFields
-	ppBase pipeProcessor
+	ppNext pipeProcessor
 }

 func (pfp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
@ -69,7 +81,7 @@ func (pfp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
 	if !pfp.pf.containsStar {
 		br.setColumns(pfp.pf.fields)
 	}
-	pfp.ppBase.writeBlock(workerID, br)
+	pfp.ppNext.writeBlock(workerID, br)
 }

 func (pfp *pipeFieldsProcessor) flush() error {
--- a/lib/logstorage/pipe_filter.go
+++ b/lib/logstorage/pipe_filter.go
@ -29,12 +29,30 @@ func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet)
 	}
 }

-func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pf *pipeFilter) optimize() {
+	optimizeFilterIn(pf.f)
+}
+
+func (pf *pipeFilter) hasFilterInWithQuery() bool {
+	return hasFilterInWithQueryForFilter(pf.f)
+}
+
+func (pf *pipeFilter) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	fNew, err := initFilterInValuesForFilter(cache, pf.f, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	pfNew := *pf
+	pf.f = fNew
+	return &pfNew, nil
+}
+
+func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	shards := make([]pipeFilterProcessorShard, workersCount)

 	pfp := &pipeFilterProcessor{
 		pf:     pf,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,
 	}
@ -43,7 +61,7 @@ func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu

 type pipeFilterProcessor struct {
 	pf     *pipeFilter
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeFilterProcessorShard
 }
@ -72,8 +90,8 @@ func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
 	bm.setBits()
 	pfp.pf.f.applyToBlockResult(br, bm)
 	if bm.areAllBitsSet() {
-		// Fast path - the filter didn't filter out anything - send br to the base pipe as is.
-		pfp.ppBase.writeBlock(workerID, br)
+		// Fast path - the filter didn't filter out anything - send br to the next pipe as is.
+		pfp.ppNext.writeBlock(workerID, br)
 		return
 	}
 	if bm.isZero() {
@ -81,9 +99,9 @@ func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
 		return
 	}

-	// Slow path - copy the remaining rows from br to shard.br before sending them to base pipe.
+	// Slow path - copy the remaining rows from br to shard.br before sending them to the next pipe.
 	shard.br.initFromFilterAllColumns(br, bm)
-	pfp.ppBase.writeBlock(workerID, &shard.br)
+	pfp.ppNext.writeBlock(workerID, &shard.br)
 }

 func (pfp *pipeFilterProcessor) flush() error {
--- a/lib/logstorage/pipe_format.go
+++ b/lib/logstorage/pipe_format.go
@ -4,8 +4,6 @@ import (
 	"fmt"
 	"strconv"
 	"unsafe"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 )

 // pipeFormat processes '| format ...' pipe.
@ -74,10 +72,28 @@ func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet)
 	}
 }

-func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pf *pipeFormat) optimize() {
+	pf.iff.optimizeFilterIn()
+}
+
+func (pf *pipeFormat) hasFilterInWithQuery() bool {
+	return pf.iff.hasFilterInWithQuery()
+}
+
+func (pf *pipeFormat) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pf.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	pfNew := *pf
+	pfNew.iff = iffNew
+	return &pfNew, nil
+}
+
+func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeFormatProcessor{
 		pf:     pf,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: make([]pipeFormatProcessorShard, workersCount),
 	}
@ -85,7 +101,7 @@ func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu

 type pipeFormatProcessor struct {
 	pf     *pipeFormat
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeFormatProcessorShard
 }
@ -100,8 +116,8 @@ type pipeFormatProcessorShard struct {
 type pipeFormatProcessorShardNopad struct {
 	bm bitmap

-	uctx fieldsUnpackerContext
-	wctx pipeUnpackWriteContext
+	a  arena
+	rc resultColumn
 }

 func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
@ -110,39 +126,49 @@ func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
 	}

 	shard := &pfp.shards[workerID]
-	shard.wctx.init(workerID, pfp.ppBase, pfp.pf.keepOriginalFields, pfp.pf.skipEmptyResults, br)
-	shard.uctx.init(workerID, "")
+	pf := pfp.pf

 	bm := &shard.bm
 	bm.init(len(br.timestamps))
 	bm.setBits()
-	if iff := pfp.pf.iff; iff != nil {
+	if iff := pf.iff; iff != nil {
 		iff.f.applyToBlockResult(br, bm)
 		if bm.isZero() {
-			pfp.ppBase.writeBlock(workerID, br)
+			pfp.ppNext.writeBlock(workerID, br)
 			return
 		}
 	}

+	shard.rc.name = pf.resultField
+
+	resultColumn := br.getColumnByName(pf.resultField)
 	for rowIdx := range br.timestamps {
+		v := ""
 		if bm.isSetBit(rowIdx) {
-			shard.formatRow(pfp.pf, br, rowIdx)
-			shard.wctx.writeRow(rowIdx, shard.uctx.fields)
-		} else {
-			shard.wctx.writeRow(rowIdx, nil)
+			v = shard.formatRow(pf, br, rowIdx)
+			if v == "" && pf.skipEmptyResults || pf.keepOriginalFields {
+				if vOrig := resultColumn.getValueAtRow(br, rowIdx); vOrig != "" {
+					v = vOrig
 				}
 			}
+		} else {
+			v = resultColumn.getValueAtRow(br, rowIdx)
+		}
+		shard.rc.addValue(v)
+	}

-	shard.wctx.flush()
-	shard.wctx.reset()
-	shard.uctx.reset()
+	br.addResultColumn(&shard.rc)
+	pfp.ppNext.writeBlock(workerID, br)
+
+	shard.a.reset()
+	shard.rc.reset()
 }

 func (pfp *pipeFormatProcessor) flush() error {
 	return nil
 }

-func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) {
+func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) string {
 	bb := bbPool.Get()
 	b := bb.B
 	for _, step := range pf.steps {
@ -159,10 +185,9 @@ func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult
 	}
 	bb.B = b

-	s := bytesutil.ToUnsafeString(b)
-	shard.uctx.resetFields()
-	shard.uctx.addField(pf.resultField, s)
+	v := shard.a.copyBytesToString(b)
 	bbPool.Put(bb)
+	return v
 }

 func parsePipeFormat(lex *lexer) (*pipeFormat, error) {
--- a/lib/logstorage/pipe_limit.go
+++ b/lib/logstorage/pipe_limit.go
@ -17,9 +17,22 @@ func (pl *pipeLimit) String() string {
 }

 func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
+	// nothing to do
 }

-func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+func (pl *pipeLimit) optimize() {
+	// nothing to do
+}
+
+func (pl *pipeLimit) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pl *pipeLimit) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pl, nil
+}
+
+func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
 	if pl.limit == 0 {
 		// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
 		cancel()
@ -27,14 +40,14 @@ func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), p
 	return &pipeLimitProcessor{
 		pl:     pl,
 		cancel: cancel,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeLimitProcessor struct {
 	pl     *pipeLimit
 	cancel func()
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	rowsProcessed atomic.Uint64
 }
@ -46,8 +59,8 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {

 	rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps)))
 	if rowsProcessed <= plp.pl.limit {
-		// Fast path - write all the rows to ppBase.
-		plp.ppBase.writeBlock(workerID, br)
+		// Fast path - write all the rows to ppNext.
+		plp.ppNext.writeBlock(workerID, br)
 		return
 	}

@ -61,7 +74,7 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
 	// Write remaining rows.
 	keepRows := plp.pl.limit - rowsProcessed
 	br.truncateRows(int(keepRows))
-	plp.ppBase.writeBlock(workerID, br)
+	plp.ppNext.writeBlock(workerID, br)

 	// Notify the caller that it should stop passing more data to writeBlock().
 	plp.cancel()
--- a/lib/logstorage/pipe_offset.go
+++ b/lib/logstorage/pipe_offset.go
@ -17,18 +17,31 @@ func (po *pipeOffset) String() string {
 }

 func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
+	// nothing to do
 }

-func (po *pipeOffset) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (po *pipeOffset) optimize() {
+	// nothing to do
+}
+
+func (po *pipeOffset) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (po *pipeOffset) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return po, nil
+}
+
+func (po *pipeOffset) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeOffsetProcessor{
 		po:     po,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeOffsetProcessor struct {
 	po     *pipeOffset
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	rowsProcessed atomic.Uint64
 }
@ -45,13 +58,13 @@ func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {

 	rowsProcessed -= uint64(len(br.timestamps))
 	if rowsProcessed >= pop.po.offset {
-		pop.ppBase.writeBlock(workerID, br)
+		pop.ppNext.writeBlock(workerID, br)
 		return
 	}

 	rowsSkip := pop.po.offset - rowsProcessed
 	br.skipRows(int(rowsSkip))
-	pop.ppBase.writeBlock(workerID, br)
+	pop.ppNext.writeBlock(workerID, br)
 }

 func (pop *pipeOffsetProcessor) flush() error {
--- a/lib/logstorage/pipe_pack_json.go
+++ b/lib/logstorage/pipe_pack_json.go
@ -0,0 +1,140 @@
+package logstorage
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+)
+
+// pipePackJSON processes '| pack_json ...' pipe.
+//
+// See https://docs.victoriametrics.com/victorialogs/logsql/#pack_json-pipe
+type pipePackJSON struct {
+	resultField string
+}
+
+func (pp *pipePackJSON) String() string {
+	s := "pack_json"
+	if !isMsgFieldName(pp.resultField) {
+		s += " as " + quoteTokenIfNeeded(pp.resultField)
+	}
+	return s
+}
+
+func (pp *pipePackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
+	if neededFields.contains("*") {
+		if !unneededFields.contains(pp.resultField) {
+			unneededFields.reset()
+		}
+	} else {
+		if neededFields.contains(pp.resultField) {
+			neededFields.add("*")
+		}
+	}
+}
+
+func (pp *pipePackJSON) optimize() {
+	// nothing to do
+}
+
+func (pp *pipePackJSON) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pp *pipePackJSON) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pp, nil
+}
+
+func (pp *pipePackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
+	return &pipePackJSONProcessor{
+		pp:     pp,
+		ppNext: ppNext,
+
+		shards: make([]pipePackJSONProcessorShard, workersCount),
+	}
+}
+
+type pipePackJSONProcessor struct {
+	pp     *pipePackJSON
+	ppNext pipeProcessor
+
+	shards []pipePackJSONProcessorShard
+}
+
+type pipePackJSONProcessorShard struct {
+	pipePackJSONProcessorShardNopad
+
+	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
+	_ [128 - unsafe.Sizeof(pipePackJSONProcessorShardNopad{})%128]byte
+}
+
+type pipePackJSONProcessorShardNopad struct {
+	rc resultColumn
+
+	buf    []byte
+	fields []Field
+}
+
+func (ppp *pipePackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
+	if len(br.timestamps) == 0 {
+		return
+	}
+
+	shard := &ppp.shards[workerID]
+
+	shard.rc.name = ppp.pp.resultField
+
+	cs := br.getColumns()
+
+	buf := shard.buf[:0]
+	fields := shard.fields
+	for rowIdx := range br.timestamps {
+		fields = fields[:0]
+		for _, c := range cs {
+			v := c.getValueAtRow(br, rowIdx)
+			fields = append(fields, Field{
+				Name:  c.name,
+				Value: v,
+			})
+		}
+
+		bufLen := len(buf)
+		buf = marshalFieldsToJSON(buf, fields)
+		v := bytesutil.ToUnsafeString(buf[bufLen:])
+		shard.rc.addValue(v)
+	}
+
+	br.addResultColumn(&shard.rc)
+	ppp.ppNext.writeBlock(workerID, br)
+
+	shard.rc.reset()
+}
+
+func (ppp *pipePackJSONProcessor) flush() error {
+	return nil
+}
+
+func parsePackJSON(lex *lexer) (*pipePackJSON, error) {
+	if !lex.isKeyword("pack_json") {
+		return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "pack_json")
+	}
+	lex.nextToken()
+
+	// parse optional 'as ...` part
+	resultField := "_msg"
+	if lex.isKeyword("as") {
+		lex.nextToken()
+		field, err := parseFieldName(lex)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse result field for 'pack_json': %w", err)
+		}
+		resultField = field
+	}
+
+	pp := &pipePackJSON{
+		resultField: resultField,
+	}
+
+	return pp, nil
+}
--- a/lib/logstorage/pipe_pack_json_test.go
+++ b/lib/logstorage/pipe_pack_json_test.go
@ -0,0 +1,101 @@
+package logstorage
+
+import (
+	"testing"
+)
+
+func TestParsePipePackJSONSuccess(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeSuccess(t, pipeStr)
+	}
+
+	f(`pack_json`)
+	f(`pack_json as x`)
+}
+
+func TestParsePipePackJSONFailure(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeFailure(t, pipeStr)
+	}
+
+	f(`pack_json foo bar`)
+}
+
+func TestPipePackJSON(t *testing.T) {
+	f := func(pipeStr string, rows, rowsExpected [][]Field) {
+		t.Helper()
+		expectPipeResults(t, pipeStr, rows, rowsExpected)
+	}
+
+	// pack into _msg
+	f(`pack_json`, [][]Field{
+		{
+			{"_msg", "x"},
+			{"foo", `abc`},
+			{"bar", `cde`},
+		},
+		{
+			{"a", "b"},
+			{"c", "d"},
+		},
+	}, [][]Field{
+		{
+			{"_msg", `{"_msg":"x","foo":"abc","bar":"cde"}`},
+			{"foo", `abc`},
+			{"bar", `cde`},
+		},
+		{
+			{"_msg", `{"a":"b","c":"d"}`},
+			{"a", "b"},
+			{"c", "d"},
+		},
+	})
+
+	// pack into other field
+	f(`pack_json as a`, [][]Field{
+		{
+			{"_msg", "x"},
+			{"foo", `abc`},
+			{"bar", `cde`},
+		},
+		{
+			{"a", "b"},
+			{"c", "d"},
+		},
+	}, [][]Field{
+		{
+			{"_msg", `x`},
+			{"foo", `abc`},
+			{"bar", `cde`},
+			{"a", `{"_msg":"x","foo":"abc","bar":"cde"}`},
+		},
+		{
+			{"a", `{"a":"b","c":"d"}`},
+			{"c", "d"},
+		},
+	})
+}
+
+func TestPipePackJSONUpdateNeededFields(t *testing.T) {
+	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
+		t.Helper()
+		expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
+	}
+
+	// all the needed fields
+	f(`pack_json as x`, "*", "", "*", "")
+
+	// unneeded fields do not intersect with output
+	f(`pack_json as x`, "*", "f1,f2", "*", "")
+
+	// unneeded fields intersect with output
+	f(`pack_json as f1`, "*", "f1,f2", "*", "f1,f2")
+
+	// needed fields do not intersect with output
+	f(`pack_json f1`, "x,y", "", "x,y", "")
+
+	// needed fields intersect with output
+	f(`pack_json as f2`, "f2,y", "", "*", "")
+}
--- a/lib/logstorage/pipe_rename.go
+++ b/lib/logstorage/pipe_rename.go
@ -54,16 +54,28 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
 	}
 }

-func (pr *pipeRename) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pr *pipeRename) optimize() {
+	// nothing to do
+}
+
+func (pr *pipeRename) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pr *pipeRename) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pr, nil
+}
+
+func (pr *pipeRename) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	return &pipeRenameProcessor{
 		pr:     pr,
-		ppBase: ppBase,
+		ppNext: ppNext,
 	}
 }

 type pipeRenameProcessor struct {
 	pr     *pipeRename
-	ppBase pipeProcessor
+	ppNext pipeProcessor
 }

 func (prp *pipeRenameProcessor) writeBlock(workerID uint, br *blockResult) {
@ -72,7 +84,7 @@ func (prp *pipeRenameProcessor) writeBlock(workerID uint, br *blockResult) {
 	}

 	br.renameColumns(prp.pr.srcFields, prp.pr.dstFields)
-	prp.ppBase.writeBlock(workerID, br)
+	prp.ppNext.writeBlock(workerID, br)
 }

 func (prp *pipeRenameProcessor) flush() error {
--- a/lib/logstorage/pipe_replace.go
+++ b/lib/logstorage/pipe_replace.go
@ -3,16 +3,13 @@ package logstorage
 import (
 	"fmt"
 	"strings"
-	"unsafe"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 )

 // pipeReplace processes '| replace ...' pipe.
 //
 // See https://docs.victoriametrics.com/victorialogs/logsql/#replace-pipe
 type pipeReplace struct {
-	srcField  string
+	field     string
 	oldSubstr string
 	newSubstr string

@ -29,8 +26,8 @@ func (pr *pipeReplace) String() string {
 		s += " " + pr.iff.String()
 	}
 	s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.oldSubstr), quoteTokenIfNeeded(pr.newSubstr))
-	if pr.srcField != "_msg" {
-		s += " at " + quoteTokenIfNeeded(pr.srcField)
+	if pr.field != "_msg" {
+		s += " at " + quoteTokenIfNeeded(pr.field)
 	}
 	if pr.limit > 0 {
 		s += fmt.Sprintf(" limit %d", pr.limit)
@ -39,97 +36,37 @@ func (pr *pipeReplace) String() string {
 }

 func (pr *pipeReplace) updateNeededFields(neededFields, unneededFields fieldsSet) {
-	if neededFields.contains("*") {
-		if !unneededFields.contains(pr.srcField) && pr.iff != nil {
-			unneededFields.removeFields(pr.iff.neededFields)
-		}
-	} else {
-		if neededFields.contains(pr.srcField) && pr.iff != nil {
-			neededFields.addFields(pr.iff.neededFields)
-		}
-	}
+	updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
 }

-func (pr *pipeReplace) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
-	return &pipeReplaceProcessor{
-		pr:     pr,
-		ppBase: ppBase,
-
-		shards: make([]pipeReplaceProcessorShard, workersCount),
-	}
+func (pr *pipeReplace) optimize() {
+	pr.iff.optimizeFilterIn()
 }

-type pipeReplaceProcessor struct {
-	pr     *pipeReplace
-	ppBase pipeProcessor
-
-	shards []pipeReplaceProcessorShard
+func (pr *pipeReplace) hasFilterInWithQuery() bool {
+	return pr.iff.hasFilterInWithQuery()
 }

-type pipeReplaceProcessorShard struct {
-	pipeReplaceProcessorShardNopad
-
-	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
-	_ [128 - unsafe.Sizeof(pipeReplaceProcessorShardNopad{})%128]byte
+func (pr *pipeReplace) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pr.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	peNew := *pr
+	peNew.iff = iffNew
+	return &peNew, nil
 }

-type pipeReplaceProcessorShardNopad struct {
-	bm bitmap
-
-	uctx fieldsUnpackerContext
-	wctx pipeUnpackWriteContext
-}
-
-func (prp *pipeReplaceProcessor) writeBlock(workerID uint, br *blockResult) {
-	if len(br.timestamps) == 0 {
-		return
-	}
-
-	shard := &prp.shards[workerID]
-	shard.wctx.init(workerID, prp.ppBase, false, false, br)
-	shard.uctx.init(workerID, "")
-
-	pr := prp.pr
-	bm := &shard.bm
-	bm.init(len(br.timestamps))
-	bm.setBits()
-	if iff := pr.iff; iff != nil {
-		iff.f.applyToBlockResult(br, bm)
-		if bm.isZero() {
-			prp.ppBase.writeBlock(workerID, br)
-			return
-		}
-	}
-
-	c := br.getColumnByName(pr.srcField)
-	values := c.getValues(br)
-
+func (pr *pipeReplace) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
+	updateFunc := func(a *arena, v string) string {
 		bb := bbPool.Get()
-	vPrev := ""
-	shard.uctx.addField(pr.srcField, "")
-	for rowIdx, v := range values {
-		if bm.isSetBit(rowIdx) {
-			if vPrev != v {
 		bb.B = appendReplace(bb.B[:0], v, pr.oldSubstr, pr.newSubstr, pr.limit)
-				s := bytesutil.ToUnsafeString(bb.B)
-				shard.uctx.resetFields()
-				shard.uctx.addField(pr.srcField, s)
-				vPrev = v
-			}
-			shard.wctx.writeRow(rowIdx, shard.uctx.fields)
-		} else {
-			shard.wctx.writeRow(rowIdx, nil)
-		}
-	}
+		result := a.copyBytesToString(bb.B)
 		bbPool.Put(bb)
-
-	shard.wctx.flush()
-	shard.wctx.reset()
-	shard.uctx.reset()
+		return result
 	}

-func (prp *pipeReplaceProcessor) flush() error {
-	return nil
+	return newPipeUpdateProcessor(workersCount, updateFunc, ppNext, pr.field, pr.iff)
 }

 func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
@ -164,7 +101,7 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {

 	newSubstr, err := getCompoundToken(lex)
 	if err != nil {
-		return nil, fmt.Errorf("cannot parse newSubstr in 'replace': %w", err)
+		return nil, fmt.Errorf("cannot parse newSubstr in 'replace(%q': %w", oldSubstr, err)
 	}

 	if !lex.isKeyword(")") {
@ -172,14 +109,14 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
 	}
 	lex.nextToken()

-	srcField := "_msg"
+	field := "_msg"
 	if lex.isKeyword("at") {
 		lex.nextToken()
 		f, err := parseFieldName(lex)
 		if err != nil {
 			return nil, fmt.Errorf("cannot parse 'at' field after 'replace(%q, %q)': %w", oldSubstr, newSubstr, err)
 		}
-		srcField = f
+		field = f
 	}

 	limit := uint64(0)
@ -194,7 +131,7 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
 	}

 	pr := &pipeReplace{
-		srcField:  srcField,
+		field:     field,
 		oldSubstr: oldSubstr,
 		newSubstr: newSubstr,
 		limit:     limit,
--- a/lib/logstorage/pipe_replace_regexp.go
+++ b/lib/logstorage/pipe_replace_regexp.go
@ -0,0 +1,170 @@
+package logstorage
+
+import (
+	"fmt"
+	"regexp"
+)
+
+// pipeReplaceRegexp processes '| replace_regexp ...' pipe.
+//
+// See https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe
+type pipeReplaceRegexp struct {
+	field       string
+	re          *regexp.Regexp
+	replacement string
+
+	// limit limits the number of replacements, which can be performed
+	limit uint64
+
+	// iff is an optional filter for skipping the replace_regexp operation
+	iff *ifFilter
+}
+
+func (pr *pipeReplaceRegexp) String() string {
+	s := "replace_regexp"
+	if pr.iff != nil {
+		s += " " + pr.iff.String()
+	}
+	s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.re.String()), quoteTokenIfNeeded(pr.replacement))
+	if pr.field != "_msg" {
+		s += " at " + quoteTokenIfNeeded(pr.field)
+	}
+	if pr.limit > 0 {
+		s += fmt.Sprintf(" limit %d", pr.limit)
+	}
+	return s
+}
+
+func (pr *pipeReplaceRegexp) updateNeededFields(neededFields, unneededFields fieldsSet) {
+	updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
+}
+
+func (pr *pipeReplaceRegexp) optimize() {
+	pr.iff.optimizeFilterIn()
+}
+
+func (pr *pipeReplaceRegexp) hasFilterInWithQuery() bool {
+	return pr.iff.hasFilterInWithQuery()
+}
+
+func (pr *pipeReplaceRegexp) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pr.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	peNew := *pr
+	peNew.iff = iffNew
+	return &peNew, nil
+}
+
+func (pr *pipeReplaceRegexp) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
+	updateFunc := func(a *arena, v string) string {
+		bb := bbPool.Get()
+		bb.B = appendReplaceRegexp(bb.B[:0], v, pr.re, pr.replacement, pr.limit)
+		result := a.copyBytesToString(bb.B)
+		bbPool.Put(bb)
+		return result
+	}
+
+	return newPipeUpdateProcessor(workersCount, updateFunc, ppNext, pr.field, pr.iff)
+
+}
+
+func parsePipeReplaceRegexp(lex *lexer) (*pipeReplaceRegexp, error) {
+	if !lex.isKeyword("replace_regexp") {
+		return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "replace_regexp")
+	}
+	lex.nextToken()
+
+	// parse optional if (...)
+	var iff *ifFilter
+	if lex.isKeyword("if") {
+		f, err := parseIfFilter(lex)
+		if err != nil {
+			return nil, err
+		}
+		iff = f
+	}
+
+	if !lex.isKeyword("(") {
+		return nil, fmt.Errorf("missing '(' after 'replace_regexp'")
+	}
+	lex.nextToken()
+
+	reStr, err := getCompoundToken(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse reStr in 'replace_regexp': %w", err)
+	}
+	re, err := regexp.Compile(reStr)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse regexp %q in 'replace_regexp': %w", reStr, err)
+	}
+	if !lex.isKeyword(",") {
+		return nil, fmt.Errorf("missing ',' after 'replace_regexp(%q'", reStr)
+	}
+	lex.nextToken()
+
+	replacement, err := getCompoundToken(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse replacement in 'replace_regexp(%q': %w", reStr, err)
+	}
+
+	if !lex.isKeyword(")") {
+		return nil, fmt.Errorf("missing ')' after 'replace_regexp(%q, %q'", reStr, replacement)
+	}
+	lex.nextToken()
+
+	field := "_msg"
+	if lex.isKeyword("at") {
+		lex.nextToken()
+		f, err := parseFieldName(lex)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse 'at' field after 'replace_regexp(%q, %q)': %w", reStr, replacement, err)
+		}
+		field = f
+	}
+
+	limit := uint64(0)
+	if lex.isKeyword("limit") {
+		lex.nextToken()
+		n, ok := tryParseUint64(lex.token)
+		if !ok {
+			return nil, fmt.Errorf("cannot parse 'limit %s' in 'replace_regexp'", lex.token)
+		}
+		lex.nextToken()
+		limit = n
+	}
+
+	pr := &pipeReplaceRegexp{
+		field:       field,
+		re:          re,
+		replacement: replacement,
+		limit:       limit,
+		iff:         iff,
+	}
+
+	return pr, nil
+}
+
+func appendReplaceRegexp(dst []byte, s string, re *regexp.Regexp, replacement string, limit uint64) []byte {
+	if len(s) == 0 {
+		return dst
+	}
+
+	replacements := uint64(0)
+	for {
+		locs := re.FindStringSubmatchIndex(s)
+		if locs == nil {
+			return append(dst, s...)
+		}
+		start := locs[0]
+		dst = append(dst, s[:start]...)
+		end := locs[1]
+		dst = re.ExpandString(dst, replacement, s, locs)
+		s = s[end:]
+		replacements++
+		if limit > 0 && replacements >= limit {
+			return append(dst, s...)
+		}
+	}
+}
--- a/lib/logstorage/pipe_replace_regexp_test.go
+++ b/lib/logstorage/pipe_replace_regexp_test.go
@ -0,0 +1,200 @@
+package logstorage
+
+import (
+	"regexp"
+	"testing"
+)
+
+func TestParsePipeReplaceRegexpSuccess(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeSuccess(t, pipeStr)
+	}
+
+	f(`replace_regexp (foo, bar)`)
+	f(`replace_regexp ("foo[^ ]+bar|baz", "bar${1}x$0")`)
+	f(`replace_regexp (" ", "") at x`)
+	f(`replace_regexp if (x:y) ("-", ":") at a`)
+	f(`replace_regexp (" ", "") at x limit 10`)
+	f(`replace_regexp if (x:y) (" ", "") at foo limit 10`)
+}
+
+func TestParsePipeReplaceRegexpFailure(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeFailure(t, pipeStr)
+	}
+
+	f(`replace_regexp`)
+	f(`replace_regexp if`)
+	f(`replace_regexp foo`)
+	f(`replace_regexp (`)
+	f(`replace_regexp (foo`)
+	f(`replace_regexp (foo,`)
+	f(`replace_regexp(foo,bar`)
+	f(`replace_regexp(foo,bar,baz)`)
+	f(`replace_regexp(foo,bar) abc`)
+	f(`replace_regexp(bar,baz) limit`)
+	f(`replace_regexp(bar,baz) limit N`)
+	f(`replace_regexp ("foo[", "bar")`)
+}
+
+func TestPipeReplaceRegexp(t *testing.T) {
+	f := func(pipeStr string, rows, rowsExpected [][]Field) {
+		t.Helper()
+		expectPipeResults(t, pipeStr, rows, rowsExpected)
+	}
+
+	// replace_regexp with placeholders
+	f(`replace_regexp ("foo(.+?)bar", "q-$1-x")`, [][]Field{
+		{
+			{"_msg", `abc foo a bar foobar foo b bar`},
+			{"bar", `cde`},
+		},
+		{
+			{"_msg", `1234`},
+		},
+	}, [][]Field{
+		{
+			{"_msg", `abc q- a -x q-bar foo b -x`},
+			{"bar", `cde`},
+		},
+		{
+			{"_msg", `1234`},
+		},
+	})
+
+	// replace_regexp without limits at _msg
+	f(`replace_regexp ("[_/]", "-")`, [][]Field{
+		{
+			{"_msg", `a_bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"_msg", `1234`},
+		},
+	}, [][]Field{
+		{
+			{"_msg", `a-bc-d-ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"_msg", `1234`},
+		},
+	})
+
+	// replace_regexp with limit 1 at foo
+	f(`replace_regexp ("[_/]", "-") at foo limit 1`, [][]Field{
+		{
+			{"foo", `a_bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `1234`},
+		},
+	}, [][]Field{
+		{
+			{"foo", `a-bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `1234`},
+		},
+	})
+
+	// replace_regexp with limit 100 at foo
+	f(`replace_regexp ("[_/]", "-") at foo limit 100`, [][]Field{
+		{
+			{"foo", `a_bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `1234`},
+		},
+	}, [][]Field{
+		{
+			{"foo", `a-bc-d-ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `1234`},
+		},
+	})
+
+	// conditional replace_regexp at foo
+	f(`replace_regexp if (bar:abc) ("[_/]", "") at foo`, [][]Field{
+		{
+			{"foo", `a_bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `123_45/6`},
+			{"bar", "abc"},
+		},
+	}, [][]Field{
+		{
+			{"foo", `a_bc_d/ef`},
+			{"bar", `cde`},
+		},
+		{
+			{"foo", `123456`},
+			{"bar", "abc"},
+		},
+	})
+}
+
+func TestPipeReplaceRegexpUpdateNeededFields(t *testing.T) {
+	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
+		t.Helper()
+		expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
+	}
+
+	// all the needed fields
+	f(`replace_regexp ("a", "b") at x`, "*", "", "*", "")
+	f(`replace_regexp if (f1:q) ("a", "b") at x`, "*", "", "*", "")
+
+	// unneeded fields do not intersect with at field
+	f(`replace_regexp ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
+	f(`replace_regexp if (f3:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
+	f(`replace_regexp if (f2:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1")
+
+	// unneeded fields intersect with at field
+	f(`replace_regexp ("a", "b") at x`, "*", "x,y", "*", "x,y")
+	f(`replace_regexp if (f1:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
+	f(`replace_regexp if (x:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
+	f(`replace_regexp if (y:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
+
+	// needed fields do not intersect with at field
+	f(`replace_regexp ("a", "b") at x`, "f2,y", "", "f2,y", "")
+	f(`replace_regexp if (f1:q) ("a", "b") at x`, "f2,y", "", "f2,y", "")
+
+	// needed fields intersect with at field
+	f(`replace_regexp ("a", "b") at y`, "f2,y", "", "f2,y", "")
+	f(`replace_regexp if (f1:q) ("a", "b") at y`, "f2,y", "", "f1,f2,y", "")
+}
+
+func TestAppendReplaceRegexp(t *testing.T) {
+	f := func(s, reStr, replacement string, limit int, resultExpected string) {
+		t.Helper()
+
+		re := regexp.MustCompile(reStr)
+		result := appendReplaceRegexp(nil, s, re, replacement, uint64(limit))
+		if string(result) != resultExpected {
+			t.Fatalf("unexpected result for appendReplaceRegexp(%q, %q, %q, %d)\ngot\n%s\nwant\n%s", s, reStr, replacement, limit, result, resultExpected)
+		}
+	}
+
+	f("", "", "", 0, "")
+	f("", "foo", "bar", 0, "")
+	f("abc", "foo", "bar", 0, "abc")
+	f("foo", "fo+", "bar", 0, "bar")
+	f("foox", "fo+", "bar", 0, "barx")
+	f("afoo", "fo+", "bar", 0, "abar")
+	f("afoox", "fo+", "bar", 0, "abarx")
+	f("foo-bar/baz", "[-/]", "_", 0, "foo_bar_baz")
+	f("foo bar/ baz  ", "[ /]", "", 2, "foobar baz  ")
+
+	// placeholders
+	f("afoo abc barz", "a([^ ]+)", "b${1}x", 0, "bfoox bbcx bbrzx")
+	f("afoo abc barz", "a([^ ]+)", "b${1}x", 1, "bfoox abc barz")
+}
--- a/lib/logstorage/pipe_replace_test.go
+++ b/lib/logstorage/pipe_replace_test.go
@ -163,10 +163,11 @@ func TestAppendReplace(t *testing.T) {

 	f("", "", "", 0, "")
 	f("", "foo", "bar", 0, "")
+	f("abc", "foo", "bar", 0, "abc")
 	f("foo", "foo", "bar", 0, "bar")
 	f("foox", "foo", "bar", 0, "barx")
 	f("afoo", "foo", "bar", 0, "abar")
 	f("afoox", "foo", "bar", 0, "abarx")
 	f("foo-bar-baz", "-", "_", 0, "foo_bar_baz")
-	f("foo bar baz   ", " ", "", 0, "foobarbaz")
+	f("foo bar baz  ", " ", "", 1, "foobar baz  ")
 }
--- a/lib/logstorage/pipe_sort.go
+++ b/lib/logstorage/pipe_sort.go
@ -67,14 +67,26 @@ func (ps *pipeSort) updateNeededFields(neededFields, unneededFields fieldsSet) {
 	}
 }

-func (ps *pipeSort) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
-	if ps.limit > 0 {
-		return newPipeTopkProcessor(ps, workersCount, stopCh, cancel, ppBase)
-	}
-	return newPipeSortProcessor(ps, workersCount, stopCh, cancel, ppBase)
+func (ps *pipeSort) optimize() {
+	// nothing to do
 }

-func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+func (ps *pipeSort) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (ps *pipeSort) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return ps, nil
+}
+
+func (ps *pipeSort) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
+	if ps.limit > 0 {
+		return newPipeTopkProcessor(ps, workersCount, stopCh, cancel, ppNext)
+	}
+	return newPipeSortProcessor(ps, workersCount, stopCh, cancel, ppNext)
+}
+
+func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
 	maxStateSize := int64(float64(memory.Allowed()) * 0.2)

 	shards := make([]pipeSortProcessorShard, workersCount)
@ -92,7 +104,7 @@ func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
 		ps:     ps,
 		stopCh: stopCh,
 		cancel: cancel,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,

@ -107,7 +119,7 @@ type pipeSortProcessor struct {
 	ps     *pipeSort
 	stopCh <-chan struct{}
 	cancel func()
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeSortProcessorShard

@ -522,7 +534,7 @@ func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
 		}
 	}
 	if !areEqualColumns {
-		// send the current block to ppBase and construct a block with new set of columns
+		// send the current block to ppNext and construct a block with new set of columns
 		wctx.flush()

 		rcs = wctx.rcs[:0]
@ -561,10 +573,10 @@ func (wctx *pipeSortWriteContext) flush() {

 	wctx.valuesLen = 0

-	// Flush rcs to ppBase
+	// Flush rcs to ppNext
 	br.setResultColumns(rcs, wctx.rowsCount)
 	wctx.rowsCount = 0
-	wctx.psp.ppBase.writeBlock(0, br)
+	wctx.psp.ppNext.writeBlock(0, br)
 	br.reset()
 	for i := range rcs {
 		rcs[i].resetValues()
--- a/lib/logstorage/pipe_stats.go
+++ b/lib/logstorage/pipe_stats.go
@ -116,24 +116,47 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
 	unneededFields.reset()
 }

+func (ps *pipeStats) optimize() {
+	for _, f := range ps.funcs {
+		f.iff.optimizeFilterIn()
+	}
+}
+
+func (ps *pipeStats) hasFilterInWithQuery() bool {
+	for _, f := range ps.funcs {
+		if f.iff.hasFilterInWithQuery() {
+			return true
+		}
+	}
+	return false
+}
+
+func (ps *pipeStats) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	funcsNew := make([]pipeStatsFunc, len(ps.funcs))
+	for i, f := range ps.funcs {
+		iffNew, err := f.iff.initFilterInValues(cache, getFieldValuesFunc)
+		if err != nil {
+			return nil, err
+		}
+		f.iff = iffNew
+		funcsNew[i] = f
+	}
+	psNew := *ps
+	ps.funcs = funcsNew
+	return &psNew, nil
+}
+
 const stateSizeBudgetChunk = 1 << 20

-func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
 	maxStateSize := int64(float64(memory.Allowed()) * 0.3)

 	shards := make([]pipeStatsProcessorShard, workersCount)
-	funcsLen := len(ps.funcs)
 	for i := range shards {
 		shards[i] = pipeStatsProcessorShard{
 			pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{
 				ps: ps,

-				m: make(map[string]*pipeStatsGroup),
-
-				bms:    make([]bitmap, funcsLen),
-				brs:    make([]*blockResult, funcsLen),
-				brsBuf: make([]blockResult, funcsLen),
-
 				stateSizeBudget: stateSizeBudgetChunk,
 			},
 		}
@ -144,7 +167,7 @@ func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{},
 		ps:     ps,
 		stopCh: stopCh,
 		cancel: cancel,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,

@ -159,7 +182,7 @@ type pipeStatsProcessor struct {
 	ps     *pipeStats
 	stopCh <-chan struct{}
 	cancel func()
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeStatsProcessorShard

@ -190,7 +213,22 @@ type pipeStatsProcessorShardNopad struct {
 	stateSizeBudget int
 }

+func (shard *pipeStatsProcessorShard) init() {
+	if shard.m != nil {
+		// Already initialized
+		return
+	}
+
+	funcsLen := len(shard.ps.funcs)
+
+	shard.m = make(map[string]*pipeStatsGroup)
+	shard.bms = make([]bitmap, funcsLen)
+	shard.brs = make([]*blockResult, funcsLen)
+	shard.brsBuf = make([]blockResult, funcsLen)
+}
+
 func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
+	shard.init()
 	byFields := shard.ps.byFields

 	// Apply per-function filters
@ -398,7 +436,9 @@ func (psp *pipeStatsProcessor) flush() error {

 	// Merge states across shards
 	shards := psp.shards
-	m := shards[0].m
+	shardMain := &shards[0]
+	shardMain.init()
+	m := shardMain.m
 	shards = shards[1:]
 	for i := range shards {
 		shard := &shards[i]
@ -420,12 +460,12 @@ func (psp *pipeStatsProcessor) flush() error {
 		}
 	}

-	// Write per-group states to ppBase
+	// Write per-group states to ppNext
 	byFields := psp.ps.byFields
 	if len(byFields) == 0 && len(m) == 0 {
 		// Special case - zero matching rows.
-		_ = shards[0].getPipeStatsGroup(nil)
-		m = shards[0].m
+		_ = shardMain.getPipeStatsGroup(nil)
+		m = shardMain.m
 	}

 	rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs))
@ -480,7 +520,7 @@ func (psp *pipeStatsProcessor) flush() error {
 		if valuesLen >= 1_000_000 {
 			br.setResultColumns(rcs, rowsCount)
 			rowsCount = 0
-			psp.ppBase.writeBlock(0, &br)
+			psp.ppNext.writeBlock(0, &br)
 			br.reset()
 			for i := range rcs {
 				rcs[i].resetValues()
@ -490,7 +530,7 @@ func (psp *pipeStatsProcessor) flush() error {
 	}

 	br.setResultColumns(rcs, rowsCount)
-	psp.ppBase.writeBlock(0, &br)
+	psp.ppNext.writeBlock(0, &br)

 	return nil
 }
--- a/lib/logstorage/pipe_topk.go
+++ b/lib/logstorage/pipe_topk.go
@ -13,7 +13,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
 )

-func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
 	maxStateSize := int64(float64(memory.Allowed()) * 0.2)

 	shards := make([]pipeTopkProcessorShard, workersCount)
@ -31,7 +31,7 @@ func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
 		ps:     ps,
 		stopCh: stopCh,
 		cancel: cancel,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,

@ -46,7 +46,7 @@ type pipeTopkProcessor struct {
 	ps     *pipeSort
 	stopCh <-chan struct{}
 	cancel func()
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeTopkProcessorShard

@ -464,7 +464,7 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
 		}
 	}
 	if !areEqualColumns {
-		// send the current block to ppBase and construct a block with new set of columns
+		// send the current block to ppNext and construct a block with new set of columns
 		wctx.flush()

 		rcs = wctx.rcs[:0]
@ -508,10 +508,10 @@ func (wctx *pipeTopkWriteContext) flush() {

 	wctx.valuesLen = 0

-	// Flush rcs to ppBase
+	// Flush rcs to ppNext
 	br.setResultColumns(rcs, wctx.rowsCount)
 	wctx.rowsCount = 0
-	wctx.ptp.ppBase.writeBlock(0, br)
+	wctx.ptp.ppNext.writeBlock(0, br)
 	br.reset()
 	for i := range rcs {
 		rcs[i].resetValues()
--- a/lib/logstorage/pipe_uniq.go
+++ b/lib/logstorage/pipe_uniq.go
@ -32,7 +32,7 @@ func (pu *pipeUniq) String() string {
 		s += " by (" + fieldNamesString(pu.byFields) + ")"
 	}
 	if pu.hitsFieldName != "" {
-		s += " hits"
+		s += " with hits"
 	}
 	if pu.limit > 0 {
 		s += fmt.Sprintf(" limit %d", pu.limit)
@ -51,7 +51,19 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
 	}
 }

-func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+func (pu *pipeUniq) optimize() {
+	// nothing to do
+}
+
+func (pu *pipeUniq) hasFilterInWithQuery() bool {
+	return false
+}
+
+func (pu *pipeUniq) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	return pu, nil
+}
+
+func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
 	maxStateSize := int64(float64(memory.Allowed()) * 0.2)

 	shards := make([]pipeUniqProcessorShard, workersCount)
@ -69,7 +81,7 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
 		pu:     pu,
 		stopCh: stopCh,
 		cancel: cancel,
-		ppBase: ppBase,
+		ppNext: ppNext,

 		shards: shards,

@ -84,7 +96,7 @@ type pipeUniqProcessor struct {
 	pu     *pipeUniq
 	stopCh <-chan struct{}
 	cancel func()
-	ppBase pipeProcessor
+	ppNext pipeProcessor

 	shards []pipeUniqProcessorShard

@ -418,7 +430,7 @@ func (wctx *pipeUniqWriteContext) writeRow(rowFields []Field) {
 		}
 	}
 	if !areEqualColumns {
-		// send the current block to ppBase and construct a block with new set of columns
+		// send the current block to ppNext and construct a block with new set of columns
 		wctx.flush()

 		rcs = wctx.rcs[:0]
@ -446,10 +458,10 @@ func (wctx *pipeUniqWriteContext) flush() {

 	wctx.valuesLen = 0

-	// Flush rcs to ppBase
+	// Flush rcs to ppNext
 	br.setResultColumns(rcs, wctx.rowsCount)
 	wctx.rowsCount = 0
-	wctx.pup.ppBase.writeBlock(0, br)
+	wctx.pup.ppNext.writeBlock(0, br)
 	br.reset()
 	for i := range rcs {
 		rcs[i].resetValues()
@ -477,6 +489,12 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
 		pu.byFields = bfs
 	}

+	if lex.isKeyword("with") {
+		lex.nextToken()
+		if !lex.isKeyword("hits") {
+			return nil, fmt.Errorf("missing 'hits' after 'with'")
+		}
+	}
 	if lex.isKeyword("hits") {
 		lex.nextToken()
 		hitsFieldName := "hits"
--- a/lib/logstorage/pipe_uniq_test.go
+++ b/lib/logstorage/pipe_uniq_test.go
@ -11,15 +11,15 @@ func TestParsePipeUniqSuccess(t *testing.T) {
 	}

 	f(`uniq`)
-	f(`uniq hits`)
+	f(`uniq with hits`)
 	f(`uniq limit 10`)
-	f(`uniq hits limit 10`)
+	f(`uniq with hits limit 10`)
 	f(`uniq by (x)`)
 	f(`uniq by (x) limit 10`)
 	f(`uniq by (x, y)`)
-	f(`uniq by (x, y) hits`)
+	f(`uniq by (x, y) with hits`)
 	f(`uniq by (x, y) limit 10`)
-	f(`uniq by (x, y) hits limit 10`)
+	f(`uniq by (x, y) with hits limit 10`)
 }

 func TestParsePipeUniqFailure(t *testing.T) {
@ -33,6 +33,7 @@ func TestParsePipeUniqFailure(t *testing.T) {
 	f(`uniq by hits`)
 	f(`uniq by(x) limit`)
 	f(`uniq by(x) limit foo`)
+	f(`uniq by (x) with`)
 }

 func TestPipeUniq(t *testing.T) {
@ -365,10 +366,12 @@ func TestPipeUniqUpdateNeededFields(t *testing.T) {
 	f("uniq by()", "*", "", "*", "")
 	f("uniq by(*)", "*", "", "*", "")
 	f("uniq by(f1,f2)", "*", "", "f1,f2", "")
+	f("uniq by(f1,f2) with hits", "*", "", "f1,f2", "")

 	// all the needed fields, unneeded fields do not intersect with src
 	f("uniq by(s1, s2)", "*", "f1,f2", "s1,s2", "")
 	f("uniq", "*", "f1,f2", "*", "")
+	f("uniq with hits", "*", "f1,f2", "*", "")

 	// all the needed fields, unneeded fields intersect with src
 	f("uniq by(s1, s2)", "*", "s1,f1,f2", "s1,s2", "")
--- a/lib/logstorage/pipe_unpack.go
+++ b/lib/logstorage/pipe_unpack.go
@ -6,6 +6,49 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 )

+func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter, neededFields, unneededFields fieldsSet) {
+	if neededFields.contains("*") {
+		unneededFieldsOrig := unneededFields.clone()
+		unneededFieldsCount := 0
+		if len(outFields) > 0 {
+			for _, f := range outFields {
+				if unneededFieldsOrig.contains(f) {
+					unneededFieldsCount++
+				}
+				if !keepOriginalFields && !skipEmptyResults {
+					unneededFields.add(f)
+				}
+			}
+		}
+		if len(outFields) == 0 || unneededFieldsCount < len(outFields) {
+			unneededFields.remove(fromField)
+			if iff != nil {
+				unneededFields.removeFields(iff.neededFields)
+			}
+		}
+	} else {
+		neededFieldsOrig := neededFields.clone()
+		needFromField := len(outFields) == 0
+		if len(outFields) > 0 {
+			needFromField = false
+			for _, f := range outFields {
+				if neededFieldsOrig.contains(f) {
+					needFromField = true
+				}
+				if !keepOriginalFields && !skipEmptyResults {
+					neededFields.remove(f)
+				}
+			}
+		}
+		if needFromField {
+			neededFields.add(fromField)
+			if iff != nil {
+				neededFields.addFields(iff.neededFields)
+			}
+		}
+	}
+}
+
 type fieldsUnpackerContext struct {
 	workerID    uint
 	fieldPrefix string
@ -53,12 +96,12 @@ func (uctx *fieldsUnpackerContext) addField(name, value string) {
 	})
 }

-func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpackerContext, s string), ppBase pipeProcessor,
+func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpackerContext, s string), ppNext pipeProcessor,
 	fromField string, fieldPrefix string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter) *pipeUnpackProcessor {

 	return &pipeUnpackProcessor{
 		unpackFunc: unpackFunc,
-		ppBase:     ppBase,
+		ppNext:     ppNext,

 		shards: make([]pipeUnpackProcessorShard, workersCount),

@ -72,7 +115,7 @@ func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpack

 type pipeUnpackProcessor struct {
 	unpackFunc func(uctx *fieldsUnpackerContext, s string)
-	ppBase     pipeProcessor
+	ppNext     pipeProcessor

 	shards []pipeUnpackProcessorShard

@ -104,7 +147,7 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
 	}

 	shard := &pup.shards[workerID]
-	shard.wctx.init(workerID, pup.ppBase, pup.keepOriginalFields, pup.skipEmptyResults, br)
+	shard.wctx.init(workerID, pup.ppNext, pup.keepOriginalFields, pup.skipEmptyResults, br)
 	shard.uctx.init(workerID, pup.fieldPrefix)

 	bm := &shard.bm
@ -113,7 +156,7 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
 	if pup.iff != nil {
 		pup.iff.f.applyToBlockResult(br, bm)
 		if bm.isZero() {
-			pup.ppBase.writeBlock(workerID, br)
+			pup.ppNext.writeBlock(workerID, br)
 			return
 		}
 	}
@ -132,13 +175,16 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
 		}
 	} else {
 		values := c.getValues(br)
-		vPrevApplied := ""
+		vPrev := ""
+		hadUnpacks := false
 		for i, v := range values {
 			if bm.isSetBit(i) {
-				if vPrevApplied != v {
+				if !hadUnpacks || vPrev != v {
+					vPrev = v
+					hadUnpacks = true
+
 					shard.uctx.resetFields()
 					pup.unpackFunc(&shard.uctx, v)
-					vPrevApplied = v
 				}
 				shard.wctx.writeRow(i, shard.uctx.fields)
 			} else {
@ -158,7 +204,7 @@ func (pup *pipeUnpackProcessor) flush() error {

 type pipeUnpackWriteContext struct {
 	workerID           uint
-	ppBase             pipeProcessor
+	ppNext             pipeProcessor
 	keepOriginalFields bool
 	skipEmptyResults   bool

@ -177,7 +223,7 @@ type pipeUnpackWriteContext struct {

 func (wctx *pipeUnpackWriteContext) reset() {
 	wctx.workerID = 0
-	wctx.ppBase = nil
+	wctx.ppNext = nil
 	wctx.keepOriginalFields = false

 	wctx.brSrc = nil
@ -193,11 +239,11 @@ func (wctx *pipeUnpackWriteContext) reset() {
 	wctx.valuesLen = 0
 }

-func (wctx *pipeUnpackWriteContext) init(workerID uint, ppBase pipeProcessor, keepOriginalFields, skipEmptyResults bool, brSrc *blockResult) {
+func (wctx *pipeUnpackWriteContext) init(workerID uint, ppNext pipeProcessor, keepOriginalFields, skipEmptyResults bool, brSrc *blockResult) {
 	wctx.reset()

 	wctx.workerID = workerID
-	wctx.ppBase = ppBase
+	wctx.ppNext = ppNext
 	wctx.keepOriginalFields = keepOriginalFields
 	wctx.skipEmptyResults = skipEmptyResults

@ -219,7 +265,7 @@ func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
 		}
 	}
 	if !areEqualColumns {
-		// send the current block to ppBase and construct a block with new set of columns
+		// send the current block to ppNext and construct a block with new set of columns
 		wctx.flush()

 		rcs = wctx.rcs[:0]
@ -264,11 +310,11 @@ func (wctx *pipeUnpackWriteContext) flush() {

 	wctx.valuesLen = 0

-	// Flush rcs to ppBase
+	// Flush rcs to ppNext
 	br := &wctx.br
 	br.setResultColumns(rcs, wctx.rowsCount)
 	wctx.rowsCount = 0
-	wctx.ppBase.writeBlock(wctx.workerID, br)
+	wctx.ppNext.writeBlock(wctx.workerID, br)
 	br.reset()
 	for i := range rcs {
 		rcs[i].resetValues()
--- a/lib/logstorage/pipe_unpack_json.go
+++ b/lib/logstorage/pipe_unpack_json.go
@ -56,50 +56,25 @@ func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fields
 	updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
 }

-func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter, neededFields, unneededFields fieldsSet) {
-	if neededFields.contains("*") {
-		unneededFieldsOrig := unneededFields.clone()
-		unneededFieldsCount := 0
-		if len(outFields) > 0 {
-			for _, f := range outFields {
-				if unneededFieldsOrig.contains(f) {
-					unneededFieldsCount++
-				}
-				if !keepOriginalFields && !skipEmptyResults {
-					unneededFields.add(f)
-				}
-			}
-		}
-		if len(outFields) == 0 || unneededFieldsCount < len(outFields) {
-			unneededFields.remove(fromField)
-			if iff != nil {
-				unneededFields.removeFields(iff.neededFields)
-			}
-		}
-	} else {
-		neededFieldsOrig := neededFields.clone()
-		needFromField := len(outFields) == 0
-		if len(outFields) > 0 {
-			needFromField = false
-			for _, f := range outFields {
-				if neededFieldsOrig.contains(f) {
-					needFromField = true
-				}
-				if !keepOriginalFields && !skipEmptyResults {
-					neededFields.remove(f)
-				}
-			}
-		}
-		if needFromField {
-			neededFields.add(fromField)
-			if iff != nil {
-				neededFields.addFields(iff.neededFields)
-			}
-		}
-	}
+func (pu *pipeUnpackJSON) optimize() {
+	pu.iff.optimizeFilterIn()
 }

-func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pu *pipeUnpackJSON) hasFilterInWithQuery() bool {
+	return pu.iff.hasFilterInWithQuery()
+}
+
+func (pu *pipeUnpackJSON) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	puNew := *pu
+	puNew.iff = iffNew
+	return &puNew, nil
+}
+
+func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	unpackJSON := func(uctx *fieldsUnpackerContext, s string) {
 		if len(s) == 0 || s[0] != '{' {
 			// This isn't a JSON object
@ -134,7 +109,7 @@ func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{},
 		}
 		PutJSONParser(p)
 	}
-	return newPipeUnpackProcessor(workersCount, unpackJSON, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
+	return newPipeUnpackProcessor(workersCount, unpackJSON, ppNext, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
 }

 func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
--- a/lib/logstorage/pipe_unpack_json_test.go
+++ b/lib/logstorage/pipe_unpack_json_test.go
@ -1,10 +1,6 @@
 package logstorage

 import (
-	"math/rand"
-	"slices"
-	"strings"
-	"sync"
 	"testing"
 )

@ -166,7 +162,6 @@ func TestPipeUnpackJSON(t *testing.T) {
 	}, [][]Field{
 		{
 			{"_msg", `{"foo":"bar"}`},
-			{"x", ""},
 		},
 	})

@ -313,228 +308,12 @@ func TestPipeUnpackJSON(t *testing.T) {
 			{"y", `abc`},
 		},
 		{
-			{"y", ""},
 			{"z", `foobar`},
 			{"x", `{"z":["bar",123]}`},
 		},
 	})
 }

-func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
-	t.Helper()
-
-	lex := newLexer(pipeStr)
-	p, err := parsePipe(lex)
-	if err != nil {
-		t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
-	}
-
-	workersCount := 5
-	stopCh := make(chan struct{})
-	cancel := func() {}
-	ppTest := newTestPipeProcessor()
-	pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
-
-	brw := newTestBlockResultWriter(workersCount, pp)
-	for _, row := range rows {
-		brw.writeRow(row)
-	}
-	brw.flush()
-	pp.flush()
-
-	ppTest.expectRows(t, rowsExpected)
-}
-
-func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
-	return &testBlockResultWriter{
-		workersCount: workersCount,
-		ppBase:       ppBase,
-	}
-}
-
-type testBlockResultWriter struct {
-	workersCount int
-	ppBase       pipeProcessor
-	rcs          []resultColumn
-	br           blockResult
-
-	rowsCount int
-}
-
-func (brw *testBlockResultWriter) writeRow(row []Field) {
-	if !brw.areSameFields(row) {
-		brw.flush()
-
-		brw.rcs = brw.rcs[:0]
-		for _, field := range row {
-			brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
-		}
-	}
-
-	for i, field := range row {
-		brw.rcs[i].addValue(field.Value)
-	}
-	brw.rowsCount++
-	if rand.Intn(5) == 0 {
-		brw.flush()
-	}
-}
-
-func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
-	if len(brw.rcs) != len(row) {
-		return false
-	}
-	for i, rc := range brw.rcs {
-		if rc.name != row[i].Name {
-			return false
-		}
-	}
-	return true
-}
-
-func (brw *testBlockResultWriter) flush() {
-	brw.br.setResultColumns(brw.rcs, brw.rowsCount)
-	brw.rowsCount = 0
-	workerID := rand.Intn(brw.workersCount)
-	brw.ppBase.writeBlock(uint(workerID), &brw.br)
-	brw.br.reset()
-	for i := range brw.rcs {
-		brw.rcs[i].resetValues()
-	}
-}
-
-func newTestPipeProcessor() *testPipeProcessor {
-	return &testPipeProcessor{}
-}
-
-type testPipeProcessor struct {
-	resultRowsLock sync.Mutex
-	resultRows     [][]Field
-}
-
-func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
-	cs := br.getColumns()
-	var columnValues [][]string
-	for _, c := range cs {
-		values := c.getValues(br)
-		columnValues = append(columnValues, values)
-	}
-
-	for i := range br.timestamps {
-		row := make([]Field, len(columnValues))
-		for j, values := range columnValues {
-			r := &row[j]
-			r.Name = strings.Clone(cs[j].name)
-			r.Value = strings.Clone(values[i])
-		}
-		pp.resultRowsLock.Lock()
-		pp.resultRows = append(pp.resultRows, row)
-		pp.resultRowsLock.Unlock()
-	}
-}
-
-func (pp *testPipeProcessor) flush() error {
-	return nil
-}
-
-func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
-	t.Helper()
-
-	if len(pp.resultRows) != len(expectedRows) {
-		t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
-			len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
-	}
-
-	sortTestRows(pp.resultRows)
-	sortTestRows(expectedRows)
-
-	for i, resultRow := range pp.resultRows {
-		expectedRow := expectedRows[i]
-		if len(resultRow) != len(expectedRow) {
-			t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
-				i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
-		}
-		for j, resultField := range resultRow {
-			expectedField := expectedRow[j]
-			if resultField.Name != expectedField.Name {
-				t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
-					i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
-			}
-			if resultField.Value != expectedField.Value {
-				t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
-					resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
-			}
-		}
-	}
-}
-
-func sortTestRows(rows [][]Field) {
-	for _, row := range rows {
-		sortTestFields(row)
-	}
-	slices.SortFunc(rows, func(a, b []Field) int {
-		reverse := false
-		if len(a) > len(b) {
-			reverse = true
-			a, b = b, a
-		}
-		for i, fA := range a {
-			fB := b[i]
-			result := cmpTestFields(fA, fB)
-			if result == 0 {
-				continue
-			}
-			if reverse {
-				result = -result
-			}
-			return result
-		}
-		if len(a) == len(b) {
-			return 0
-		}
-		if reverse {
-			return 1
-		}
-		return -1
-	})
-}
-
-func sortTestFields(fields []Field) {
-	slices.SortFunc(fields, cmpTestFields)
-}
-
-func cmpTestFields(a, b Field) int {
-	if a.Name == b.Name {
-		if a.Value == b.Value {
-			return 0
-		}
-		if a.Value < b.Value {
-			return -1
-		}
-		return 1
-	}
-	if a.Name < b.Name {
-		return -1
-	}
-	return 1
-}
-
-func rowsToString(rows [][]Field) string {
-	a := make([]string, len(rows))
-	for i, row := range rows {
-		a[i] = rowToString(row)
-	}
-	return strings.Join(a, "\n")
-}
-
-func rowToString(row []Field) string {
-	a := make([]string, len(row))
-	for i, f := range row {
-		a[i] = f.String()
-	}
-	return "{" + strings.Join(a, ",") + "}"
-}
-
 func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
 	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
 		t.Helper()
--- a/lib/logstorage/pipe_unpack_logfmt.go
+++ b/lib/logstorage/pipe_unpack_logfmt.go
@ -54,7 +54,25 @@ func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fiel
 	updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
 }

-func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
+func (pu *pipeUnpackLogfmt) optimize() {
+	pu.iff.optimizeFilterIn()
+}
+
+func (pu *pipeUnpackLogfmt) hasFilterInWithQuery() bool {
+	return pu.iff.hasFilterInWithQuery()
+}
+
+func (pu *pipeUnpackLogfmt) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	puNew := *pu
+	puNew.iff = iffNew
+	return &puNew, nil
+}
+
+func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
 	unpackLogfmt := func(uctx *fieldsUnpackerContext, s string) {
 		p := getLogfmtParser()

@ -82,8 +100,7 @@ func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}
 		putLogfmtParser(p)
 	}

-	return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
-
+	return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppNext, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
 }

 func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {
--- a/lib/logstorage/pipe_unpack_logfmt_test.go
+++ b/lib/logstorage/pipe_unpack_logfmt_test.go
@ -151,7 +151,6 @@ func TestPipeUnpackLogfmt(t *testing.T) {
 		},
 	}, [][]Field{
 		{
-			{"foo", ""},
 			{"_msg", `foo=bar baz="x y=z" a=b`},
 		},
 	})
@ -291,7 +290,6 @@ func TestPipeUnpackLogfmt(t *testing.T) {
 			{"y", `abc`},
 		},
 		{
-			{"y", ""},
 			{"z", `foobar`},
 			{"x", `z=bar`},
 		},
--- a/lib/logstorage/pipe_unroll.go
+++ b/lib/logstorage/pipe_unroll.go
@ -0,0 +1,284 @@
+package logstorage
+
+import (
+	"fmt"
+	"slices"
+	"unsafe"
+
+	"github.com/valyala/fastjson"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
+)
+
+// pipeUnroll processes '| unroll ...' pipe.
+//
+// See https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe
+type pipeUnroll struct {
+	// fields to unroll
+	fields []string
+
+	// iff is an optional filter for skipping the unroll
+	iff *ifFilter
+}
+
+func (pu *pipeUnroll) String() string {
+	s := "unroll"
+	if pu.iff != nil {
+		s += " " + pu.iff.String()
+	}
+	s += " by (" + fieldNamesString(pu.fields) + ")"
+	return s
+}
+
+func (pu *pipeUnroll) optimize() {
+	pu.iff.optimizeFilterIn()
+}
+
+func (pu *pipeUnroll) hasFilterInWithQuery() bool {
+	return pu.iff.hasFilterInWithQuery()
+}
+
+func (pu *pipeUnroll) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
+	iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
+	if err != nil {
+		return nil, err
+	}
+	puNew := *pu
+	puNew.iff = iffNew
+	return &puNew, nil
+}
+
+func (pu *pipeUnroll) updateNeededFields(neededFields, unneededFields fieldsSet) {
+	if neededFields.contains("*") {
+		unneededFieldsCount := 0
+		for _, f := range pu.fields {
+			if unneededFields.contains(f) {
+				unneededFieldsCount++
+			}
+		}
+		if unneededFieldsCount < len(pu.fields) && pu.iff != nil {
+			unneededFields.removeFields(pu.iff.neededFields)
+		}
+	} else {
+		needIfFields := false
+		for _, f := range pu.fields {
+			if neededFields.contains(f) {
+				needIfFields = true
+			}
+		}
+		if needIfFields && pu.iff != nil {
+			neededFields.addFields(pu.iff.neededFields)
+		}
+	}
+}
+
+func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
+	return &pipeUnrollProcessor{
+		pu:     pu,
+		ppNext: ppNext,
+
+		shards: make([]pipeUnrollProcessorShard, workersCount),
+	}
+}
+
+type pipeUnrollProcessor struct {
+	pu     *pipeUnroll
+	ppNext pipeProcessor
+
+	shards []pipeUnrollProcessorShard
+}
+
+type pipeUnrollProcessorShard struct {
+	pipeUnrollProcessorShardNopad
+
+	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
+	_ [128 - unsafe.Sizeof(pipeUnrollProcessorShardNopad{})%128]byte
+}
+
+type pipeUnrollProcessorShardNopad struct {
+	bm bitmap
+
+	wctx pipeUnpackWriteContext
+	a    arena
+
+	columnValues   [][]string
+	unrolledValues [][]string
+	valuesBuf      []string
+	fields         []Field
+}
+
+func (pup *pipeUnrollProcessor) writeBlock(workerID uint, br *blockResult) {
+	if len(br.timestamps) == 0 {
+		return
+	}
+
+	pu := pup.pu
+	shard := &pup.shards[workerID]
+	shard.wctx.init(workerID, pup.ppNext, false, false, br)
+
+	bm := &shard.bm
+	bm.init(len(br.timestamps))
+	bm.setBits()
+	if iff := pu.iff; iff != nil {
+		iff.f.applyToBlockResult(br, bm)
+		if bm.isZero() {
+			pup.ppNext.writeBlock(workerID, br)
+			return
+		}
+	}
+
+	shard.columnValues = slicesutil.SetLength(shard.columnValues, len(pu.fields))
+	columnValues := shard.columnValues
+	for i, f := range pu.fields {
+		c := br.getColumnByName(f)
+		columnValues[i] = c.getValues(br)
+	}
+
+	fields := shard.fields
+	for rowIdx := range br.timestamps {
+		if bm.isSetBit(rowIdx) {
+			shard.writeUnrolledFields(br, pu.fields, columnValues, rowIdx)
+		} else {
+			fields = fields[:0]
+			for i, f := range pu.fields {
+				v := columnValues[i][rowIdx]
+				fields = append(fields, Field{
+					Name:  f,
+					Value: v,
+				})
+			}
+			shard.wctx.writeRow(rowIdx, fields)
+		}
+	}
+
+	shard.wctx.flush()
+	shard.wctx.reset()
+	shard.a.reset()
+}
+
+func (shard *pipeUnrollProcessorShard) writeUnrolledFields(br *blockResult, fieldNames []string, columnValues [][]string, rowIdx int) {
+	// unroll values at rowIdx row
+
+	shard.unrolledValues = slicesutil.SetLength(shard.unrolledValues, len(columnValues))
+	unrolledValues := shard.unrolledValues
+
+	valuesBuf := shard.valuesBuf[:0]
+	for i, values := range columnValues {
+		v := values[rowIdx]
+		valuesBufLen := len(valuesBuf)
+		valuesBuf = unpackJSONArray(valuesBuf, &shard.a, v)
+		unrolledValues[i] = valuesBuf[valuesBufLen:]
+	}
+	shard.valuesBuf = valuesBuf
+
+	// find the number of rows across unrolled values
+	rows := len(unrolledValues[0])
+	for _, values := range unrolledValues[1:] {
+		if len(values) > rows {
+			rows = len(values)
+		}
+	}
+	if rows == 0 {
+		// Unroll too a single row with empty unrolled values.
+		rows = 1
+	}
+
+	// write unrolled values to the next pipe.
+	fields := shard.fields
+	for unrollIdx := 0; unrollIdx < rows; unrollIdx++ {
+		fields = fields[:0]
+		for i, values := range unrolledValues {
+			v := ""
+			if unrollIdx < len(values) {
+				v = values[unrollIdx]
+			}
+			fields = append(fields, Field{
+				Name:  fieldNames[i],
+				Value: v,
+			})
+		}
+		shard.wctx.writeRow(rowIdx, fields)
+	}
+}
+
+func (pup *pipeUnrollProcessor) flush() error {
+	return nil
+}
+
+func parsePipeUnroll(lex *lexer) (*pipeUnroll, error) {
+	if !lex.isKeyword("unroll") {
+		return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unroll")
+	}
+	lex.nextToken()
+
+	// parse optional if (...)
+	var iff *ifFilter
+	if lex.isKeyword("if") {
+		f, err := parseIfFilter(lex)
+		if err != nil {
+			return nil, err
+		}
+		iff = f
+	}
+
+	// parse by (...)
+	if lex.isKeyword("by") {
+		lex.nextToken()
+	}
+
+	fields, err := parseFieldNamesInParens(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse 'by(...)' at 'unroll': %w", err)
+	}
+	if len(fields) == 0 {
+		return nil, fmt.Errorf("'by(...)' at 'unroll' must contain at least a single field")
+	}
+	if slices.Contains(fields, "*") {
+		return nil, fmt.Errorf("unroll by '*' isn't supported")
+	}
+
+	pu := &pipeUnroll{
+		fields: fields,
+		iff:    iff,
+	}
+
+	return pu, nil
+}
+
+func unpackJSONArray(dst []string, a *arena, s string) []string {
+	if s == "" || s[0] != '[' {
+		return dst
+	}
+
+	p := jspp.Get()
+	defer jspp.Put(p)
+
+	jsv, err := p.Parse(s)
+	if err != nil {
+		return dst
+	}
+	jsa, err := jsv.Array()
+	if err != nil {
+		return dst
+	}
+	for _, jsv := range jsa {
+		if jsv.Type() == fastjson.TypeString {
+			sb, err := jsv.StringBytes()
+			if err != nil {
+				logger.Panicf("BUG: unexpected error returned from StringBytes(): %s", err)
+			}
+			v := a.copyBytesToString(sb)
+			dst = append(dst, v)
+		} else {
+			bLen := len(a.b)
+			a.b = jsv.MarshalTo(a.b)
+			v := bytesutil.ToUnsafeString(a.b[bLen:])
+			dst = append(dst, v)
+		}
+	}
+	return dst
+}
+
+var jspp fastjson.ParserPool
--- a/lib/logstorage/pipe_unroll_test.go
+++ b/lib/logstorage/pipe_unroll_test.go
@ -0,0 +1,261 @@
+package logstorage
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParsePipeUnrollSuccess(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeSuccess(t, pipeStr)
+	}
+
+	f(`unroll by (foo)`)
+	f(`unroll if (x:y) by (foo, bar)`)
+}
+
+func TestParsePipeUrollFailure(t *testing.T) {
+	f := func(pipeStr string) {
+		t.Helper()
+		expectParsePipeFailure(t, pipeStr)
+	}
+
+	f(`unroll`)
+	f(`unroll by ()`)
+	f(`unroll by (*)`)
+	f(`unroll by (f, *)`)
+	f(`unroll by`)
+	f(`unroll (`)
+	f(`unroll by (foo) bar`)
+	f(`unroll by (x) if (a:b)`)
+}
+
+func TestPipeUnroll(t *testing.T) {
+	f := func(pipeStr string, rows, rowsExpected [][]Field) {
+		t.Helper()
+		expectPipeResults(t, pipeStr, rows, rowsExpected)
+	}
+
+	// unroll by missing field
+	f("unroll (x)", [][]Field{
+		{
+			{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
+			{"q", "w"},
+		},
+	}, [][]Field{
+		{
+			{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
+			{"q", "w"},
+			{"x", ""},
+		},
+	})
+
+	// unroll by field without JSON array
+	f("unroll (q)", [][]Field{
+		{
+			{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
+			{"q", "w"},
+		},
+	}, [][]Field{
+		{
+			{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
+			{"q", ""},
+		},
+	})
+
+	// unroll by a single field
+	f("unroll (a)", [][]Field{
+		{
+			{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
+			{"q", "w"},
+		},
+		{
+			{"a", "b"},
+			{"c", "d"},
+		},
+	}, [][]Field{
+		{
+			{"a", "foo"},
+			{"q", "w"},
+		},
+		{
+			{"a", "1"},
+			{"q", "w"},
+		},
+		{
+			{"a", `{"baz":"x"}`},
+			{"q", "w"},
+		},
+		{
+			{"a", "[1,2]"},
+			{"q", "w"},
+		},
+		{
+			{"a", "null"},
+			{"q", "w"},
+		},
+		{
+			{"a", "NaN"},
+			{"q", "w"},
+		},
+		{
+			{"a", ""},
+			{"c", "d"},
+		},
+	})
+
+	// unroll by multiple fields
+	f("unroll by (timestamp, value)", [][]Field{
+		{
+			{"timestamp", "[1,2,3]"},
+			{"value", `["foo","bar","baz"]`},
+			{"other", "abc"},
+			{"x", "y"},
+		},
+		{
+			{"timestamp", "[1]"},
+			{"value", `["foo","bar"]`},
+		},
+		{
+			{"timestamp", "[1]"},
+			{"value", `bar`},
+			{"q", "w"},
+		},
+	}, [][]Field{
+		{
+			{"timestamp", "1"},
+			{"value", "foo"},
+			{"other", "abc"},
+			{"x", "y"},
+		},
+		{
+			{"timestamp", "2"},
+			{"value", "bar"},
+			{"other", "abc"},
+			{"x", "y"},
+		},
+		{
+			{"timestamp", "3"},
+			{"value", "baz"},
+			{"other", "abc"},
+			{"x", "y"},
+		},
+		{
+			{"timestamp", "1"},
+			{"value", "foo"},
+		},
+		{
+			{"timestamp", ""},
+			{"value", "bar"},
+		},
+		{
+			{"timestamp", "1"},
+			{"value", ""},
+			{"q", "w"},
+		},
+	})
+
+	// conditional unroll by missing field
+	f("unroll if (q:abc) (a)", [][]Field{
+		{
+			{"a", `asd`},
+			{"q", "w"},
+		},
+		{
+			{"a", `["foo",123]`},
+			{"q", "abc"},
+		},
+	}, [][]Field{
+		{
+			{"a", `asd`},
+			{"q", "w"},
+		},
+		{
+			{"a", "foo"},
+			{"q", "abc"},
+		},
+		{
+			{"a", "123"},
+			{"q", "abc"},
+		},
+	})
+
+	// unroll by non-existing field
+	f("unroll (a)", [][]Field{
+		{
+			{"a", `asd`},
+			{"q", "w"},
+		},
+		{
+			{"a", `["foo",123]`},
+			{"q", "abc"},
+		},
+	}, [][]Field{
+		{
+			{"a", ``},
+			{"q", "w"},
+		},
+		{
+			{"a", "foo"},
+			{"q", "abc"},
+		},
+		{
+			{"a", "123"},
+			{"q", "abc"},
+		},
+	})
+
+}
+
+func TestPipeUnrollUpdateNeededFields(t *testing.T) {
+	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
+		t.Helper()
+		expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
+	}
+
+	// all the needed fields
+	f("unroll (x)", "*", "", "*", "")
+	f("unroll (x, y)", "*", "", "*", "")
+	f("unroll if (y:z) (a, b)", "*", "", "*", "")
+
+	// all the needed fields, unneeded fields do not intersect with src
+	f("unroll (x)", "*", "f1,f2", "*", "f1,f2")
+	f("unroll if (a:b) (x)", "*", "f1,f2", "*", "f1,f2")
+	f("unroll if (f1:b) (x)", "*", "f1,f2", "*", "f2")
+
+	// all the needed fields, unneeded fields intersect with src
+	f("unroll (x)", "*", "f2,x", "*", "f2,x")
+	f("unroll if (a:b) (x)", "*", "f2,x", "*", "f2,x")
+	f("unroll if (f2:b) (x)", "*", "f2,x", "*", "f2,x")
+
+	// needed fields do not intersect with src
+	f("unroll (x)", "f1,f2", "", "f1,f2", "")
+	f("unroll if (a:b) (x)", "f1,f2", "", "f1,f2", "")
+
+	// needed fields intersect with src
+	f("unroll (x)", "f2,x", "", "f2,x", "")
+	f("unroll if (a:b) (x)", "f2,x", "", "a,f2,x", "")
+}
+
+func TestUnpackJSONArray(t *testing.T) {
+	f := func(s string, resultExpected []string) {
+		t.Helper()
+
+		var a arena
+		result := unpackJSONArray(nil, &a, s)
+		if !reflect.DeepEqual(result, resultExpected) {
+			t.Fatalf("unexpected result for unpackJSONArray(%q)\ngot\n%q\nwant\n%q", s, result, resultExpected)
+		}
+	}
+
+	f("", nil)
+	f("123", nil)
+	f("foo", nil)
+	f(`"foo"`, nil)
+	f(`{"foo":"bar"}`, nil)
+	f(`[foo`, nil)
+	f(`[]`, nil)
+	f(`[1]`, []string{"1"})
+	f(`[1,"foo",["bar",12],{"baz":"x"},NaN,null]`, []string{"1", "foo", `["bar",12]`, `{"baz":"x"}`, "NaN", "null"})
+}
--- a/lib/logstorage/pipe_update.go
+++ b/lib/logstorage/pipe_update.go
@ -0,0 +1,103 @@
+package logstorage
+
+import (
+	"unsafe"
+)
+
+func updateNeededFieldsForUpdatePipe(neededFields, unneededFields fieldsSet, field string, iff *ifFilter) {
+	if neededFields.contains("*") {
+		if !unneededFields.contains(field) && iff != nil {
+			unneededFields.removeFields(iff.neededFields)
+		}
+	} else {
+		if neededFields.contains(field) && iff != nil {
+			neededFields.addFields(iff.neededFields)
+		}
+	}
+}
+
+func newPipeUpdateProcessor(workersCount int, updateFunc func(a *arena, v string) string, ppNext pipeProcessor, field string, iff *ifFilter) pipeProcessor {
+	return &pipeUpdateProcessor{
+		updateFunc: updateFunc,
+
+		field: field,
+		iff:   iff,
+
+		ppNext: ppNext,
+
+		shards: make([]pipeUpdateProcessorShard, workersCount),
+	}
+}
+
+type pipeUpdateProcessor struct {
+	updateFunc func(a *arena, v string) string
+
+	field string
+	iff   *ifFilter
+
+	ppNext pipeProcessor
+
+	shards []pipeUpdateProcessorShard
+}
+
+type pipeUpdateProcessorShard struct {
+	pipeUpdateProcessorShardNopad
+
+	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
+	_ [128 - unsafe.Sizeof(pipeUpdateProcessorShardNopad{})%128]byte
+}
+
+type pipeUpdateProcessorShardNopad struct {
+	bm bitmap
+
+	rc resultColumn
+	a  arena
+}
+
+func (pup *pipeUpdateProcessor) writeBlock(workerID uint, br *blockResult) {
+	if len(br.timestamps) == 0 {
+		return
+	}
+
+	shard := &pup.shards[workerID]
+
+	bm := &shard.bm
+	bm.init(len(br.timestamps))
+	bm.setBits()
+	if iff := pup.iff; iff != nil {
+		iff.f.applyToBlockResult(br, bm)
+		if bm.isZero() {
+			pup.ppNext.writeBlock(workerID, br)
+			return
+		}
+	}
+
+	shard.rc.name = pup.field
+
+	c := br.getColumnByName(pup.field)
+	values := c.getValues(br)
+
+	hadUpdates := false
+	vPrev := ""
+	for rowIdx, v := range values {
+		if bm.isSetBit(rowIdx) {
+			if !hadUpdates || vPrev != v {
+				vPrev = v
+				hadUpdates = true
+
+				v = pup.updateFunc(&shard.a, v)
+			}
+		}
+		shard.rc.addValue(v)
+	}
+
+	br.addResultColumn(&shard.rc)
+	pup.ppNext.writeBlock(workerID, br)
+
+	shard.rc.reset()
+	shard.a.reset()
+}
+
+func (pup *pipeUpdateProcessor) flush() error {
+	return nil
+}
--- a/lib/logstorage/pipe_utils_test.go
+++ b/lib/logstorage/pipe_utils_test.go
@ -0,0 +1,224 @@
+package logstorage
+
+import (
+	"math/rand"
+	"slices"
+	"strings"
+	"sync"
+	"testing"
+)
+
+func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
+	t.Helper()
+
+	lex := newLexer(pipeStr)
+	p, err := parsePipe(lex)
+	if err != nil {
+		t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
+	}
+
+	workersCount := 5
+	stopCh := make(chan struct{})
+	cancel := func() {}
+	ppTest := newTestPipeProcessor()
+	pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
+
+	brw := newTestBlockResultWriter(workersCount, pp)
+	for _, row := range rows {
+		brw.writeRow(row)
+	}
+	brw.flush()
+	pp.flush()
+
+	ppTest.expectRows(t, rowsExpected)
+}
+
+func newTestBlockResultWriter(workersCount int, ppNext pipeProcessor) *testBlockResultWriter {
+	return &testBlockResultWriter{
+		workersCount: workersCount,
+		ppNext:       ppNext,
+	}
+}
+
+type testBlockResultWriter struct {
+	workersCount int
+	ppNext       pipeProcessor
+	rcs          []resultColumn
+	br           blockResult
+
+	rowsCount int
+}
+
+func (brw *testBlockResultWriter) writeRow(row []Field) {
+	if !brw.areSameFields(row) {
+		brw.flush()
+
+		brw.rcs = brw.rcs[:0]
+		for _, field := range row {
+			brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
+		}
+	}
+
+	for i, field := range row {
+		brw.rcs[i].addValue(field.Value)
+	}
+	brw.rowsCount++
+	if rand.Intn(5) == 0 {
+		brw.flush()
+	}
+}
+
+func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
+	if len(brw.rcs) != len(row) {
+		return false
+	}
+	for i, rc := range brw.rcs {
+		if rc.name != row[i].Name {
+			return false
+		}
+	}
+	return true
+}
+
+func (brw *testBlockResultWriter) flush() {
+	brw.br.setResultColumns(brw.rcs, brw.rowsCount)
+	brw.rowsCount = 0
+	workerID := rand.Intn(brw.workersCount)
+	brw.ppNext.writeBlock(uint(workerID), &brw.br)
+	brw.br.reset()
+	for i := range brw.rcs {
+		brw.rcs[i].resetValues()
+	}
+}
+
+func newTestPipeProcessor() *testPipeProcessor {
+	return &testPipeProcessor{}
+}
+
+type testPipeProcessor struct {
+	resultRowsLock sync.Mutex
+	resultRows     [][]Field
+}
+
+func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
+	cs := br.getColumns()
+	var columnValues [][]string
+	for _, c := range cs {
+		values := c.getValues(br)
+		columnValues = append(columnValues, values)
+	}
+
+	for i := range br.timestamps {
+		row := make([]Field, len(columnValues))
+		for j, values := range columnValues {
+			r := &row[j]
+			r.Name = strings.Clone(cs[j].name)
+			r.Value = strings.Clone(values[i])
+		}
+		pp.resultRowsLock.Lock()
+		pp.resultRows = append(pp.resultRows, row)
+		pp.resultRowsLock.Unlock()
+	}
+}
+
+func (pp *testPipeProcessor) flush() error {
+	return nil
+}
+
+func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
+	t.Helper()
+
+	if len(pp.resultRows) != len(expectedRows) {
+		t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
+			len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
+	}
+
+	sortTestRows(pp.resultRows)
+	sortTestRows(expectedRows)
+
+	for i, resultRow := range pp.resultRows {
+		expectedRow := expectedRows[i]
+		if len(resultRow) != len(expectedRow) {
+			t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
+				i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
+		}
+		for j, resultField := range resultRow {
+			expectedField := expectedRow[j]
+			if resultField.Name != expectedField.Name {
+				t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
+					i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
+			}
+			if resultField.Value != expectedField.Value {
+				t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
+					resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
+			}
+		}
+	}
+}
+
+func sortTestRows(rows [][]Field) {
+	for _, row := range rows {
+		sortTestFields(row)
+	}
+	slices.SortFunc(rows, func(a, b []Field) int {
+		reverse := false
+		if len(a) > len(b) {
+			reverse = true
+			a, b = b, a
+		}
+		for i, fA := range a {
+			fB := b[i]
+			result := cmpTestFields(fA, fB)
+			if result == 0 {
+				continue
+			}
+			if reverse {
+				result = -result
+			}
+			return result
+		}
+		if len(a) == len(b) {
+			return 0
+		}
+		if reverse {
+			return 1
+		}
+		return -1
+	})
+}
+
+func sortTestFields(fields []Field) {
+	slices.SortFunc(fields, cmpTestFields)
+}
+
+func cmpTestFields(a, b Field) int {
+	if a.Name == b.Name {
+		if a.Value == b.Value {
+			return 0
+		}
+		if a.Value < b.Value {
+			return -1
+		}
+		return 1
+	}
+	if a.Name < b.Name {
+		return -1
+	}
+	return 1
+}
+
+func rowsToString(rows [][]Field) string {
+	a := make([]string, len(rows))
+	for i, row := range rows {
+		a[i] = rowToString(row)
+	}
+	return strings.Join(a, "\n")
+}
+
+func rowToString(row []Field) string {
+	a := make([]string, len(row))
+	for i, f := range row {
+		a[i] = f.String()
+	}
+	return "{" + strings.Join(a, ",") + "}"
+}
--- a/lib/logstorage/stats_fields_max_test.go
+++ b/lib/logstorage/stats_fields_max_test.go
@ -275,7 +275,7 @@ func TestStatsFieldsMax(t *testing.T) {
 		{
 			{"a", "1"},
 			{"b", ""},
-			{"x", `{"_msg":"def","a":"1","c":"foo","b":""}`},
+			{"x", `{"_msg":"def","a":"1","c":"foo"}`},
 		},
 		{
 			{"a", "3"},
--- a/lib/logstorage/stats_fields_min_test.go
+++ b/lib/logstorage/stats_fields_min_test.go
@ -274,7 +274,7 @@ func TestStatsFieldsMin(t *testing.T) {
 		{
 			{"a", "1"},
 			{"b", ""},
-			{"x", `{"_msg":"def","a":"1","c":"foo","b":""}`},
+			{"x", `{"_msg":"def","a":"1","c":"foo"}`},
 		},
 		{
 			{"a", "3"},
--- a/lib/logstorage/storage_search.go
+++ b/lib/logstorage/storage_search.go
@ -229,7 +229,7 @@ func (s *Storage) getFieldValuesNoHits(ctx context.Context, tenantIDs []TenantID
 func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
 	pipes := append([]pipe{}, q.pipes...)
 	quotedFieldName := quoteTokenIfNeeded(fieldName)
-	pipeStr := fmt.Sprintf("uniq by (%s) hits limit %d", quotedFieldName, limit)
+	pipeStr := fmt.Sprintf("uniq by (%s) with hits limit %d", quotedFieldName, limit)
 	lex := newLexer(pipeStr)

 	pu, err := parsePipeUniq(lex)
@ -288,18 +288,18 @@ func sortValuesWithHits(results []ValueWithHits) {
 	})
 }

-// GetStreamLabelNames returns stream label names from q results for the given tenantIDs.
-func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
+// GetStreamFieldNames returns stream field names from q results for the given tenantIDs.
+func (s *Storage) GetStreamFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
 	streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
 	if err != nil {
 		return nil, err
 	}

 	m := make(map[string]*uint64)
-	forEachStreamLabel(streams, func(label Field, hits uint64) {
-		pHits, ok := m[label.Name]
+	forEachStreamField(streams, func(f Field, hits uint64) {
+		pHits, ok := m[f.Name]
 		if !ok {
-			nameCopy := strings.Clone(label.Name)
+			nameCopy := strings.Clone(f.Name)
 			hitsLocal := uint64(0)
 			pHits = &hitsLocal
 			m[nameCopy] = pHits
@ -310,23 +310,23 @@ func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID,
 	return names, nil
 }

-// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs.
+// GetStreamFieldValues returns stream field values for the given fieldName from q results for the given tenantIDs.
 //
-// If limit > 9, then up to limit unique label values are returned.
-func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]ValueWithHits, error) {
+// If limit > 9, then up to limit unique values are returned.
+func (s *Storage) GetStreamFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
 	streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
 	if err != nil {
 		return nil, err
 	}

 	m := make(map[string]*uint64)
-	forEachStreamLabel(streams, func(label Field, hits uint64) {
-		if label.Name != labelName {
+	forEachStreamField(streams, func(f Field, hits uint64) {
+		if f.Name != fieldName {
 			return
 		}
-		pHits, ok := m[label.Value]
+		pHits, ok := m[f.Value]
 		if !ok {
-			valueCopy := strings.Clone(label.Value)
+			valueCopy := strings.Clone(f.Value)
 			hitsLocal := uint64(0)
 			pHits = &hitsLocal
 			m[valueCopy] = pHits
@ -429,35 +429,10 @@ func hasFilterInWithQueryForFilter(f filter) bool {

 func hasFilterInWithQueryForPipes(pipes []pipe) bool {
 	for _, p := range pipes {
-		switch t := p.(type) {
-		case *pipeStats:
-			for _, f := range t.funcs {
-				if f.iff.hasFilterInWithQuery() {
+		if p.hasFilterInWithQuery() {
 			return true
 		}
 	}
-		case *pipeReplace:
-			if t.iff.hasFilterInWithQuery() {
-				return true
-			}
-		case *pipeFormat:
-			if t.iff.hasFilterInWithQuery() {
-				return true
-			}
-		case *pipeExtract:
-			if t.iff.hasFilterInWithQuery() {
-				return true
-			}
-		case *pipeUnpackJSON:
-			if t.iff.hasFilterInWithQuery() {
-				return true
-			}
-		case *pipeUnpackLogfmt:
-			if t.iff.hasFilterInWithQuery() {
-				return true
-			}
-		}
-	}
 	return false
 }

@ -514,64 +489,11 @@ func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldVa
 func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
 	pipesNew := make([]pipe, len(pipes))
 	for i, p := range pipes {
-		switch t := p.(type) {
-		case *pipeStats:
-			funcsNew := make([]pipeStatsFunc, len(t.funcs))
-			for j, f := range t.funcs {
-				iffNew, err := f.iff.initFilterInValues(cache, getFieldValuesFunc)
+		pNew, err := p.initFilterInValues(cache, getFieldValuesFunc)
 		if err != nil {
 			return nil, err
 		}
-				f.iff = iffNew
-				funcsNew[j] = f
-			}
-			pipesNew[i] = &pipeStats{
-				byFields: t.byFields,
-				funcs:    funcsNew,
-			}
-		case *pipeReplace:
-			iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
-			if err != nil {
-				return nil, err
-			}
-			pr := *t
-			pr.iff = iffNew
-			pipesNew[i] = &pr
-		case *pipeFormat:
-			iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
-			if err != nil {
-				return nil, err
-			}
-			pf := *t
-			pf.iff = iffNew
-			pipesNew[i] = &pf
-		case *pipeExtract:
-			iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
-			if err != nil {
-				return nil, err
-			}
-			pe := *t
-			pe.iff = iffNew
-			pipesNew[i] = &pe
-		case *pipeUnpackJSON:
-			iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
-			if err != nil {
-				return nil, err
-			}
-			pu := *t
-			pu.iff = iffNew
-			pipesNew[i] = &pu
-		case *pipeUnpackLogfmt:
-			iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
-			if err != nil {
-				return nil, err
-			}
-			pu := *t
-			pu.iff = iffNew
-			pipesNew[i] = &pu
-		default:
-			pipesNew[i] = p
-		}
+		pipesNew[i] = pNew
 	}
 	return pipesNew, nil
 }
@ -1099,22 +1021,22 @@ func getFilterTimeRange(f filter) (int64, int64) {
 	return math.MinInt64, math.MaxInt64
 }

-func forEachStreamLabel(streams []ValueWithHits, f func(label Field, hits uint64)) {
-	var labels []Field
+func forEachStreamField(streams []ValueWithHits, f func(f Field, hits uint64)) {
+	var fields []Field
 	for i := range streams {
 		var err error
-		labels, err = parseStreamLabels(labels[:0], streams[i].Value)
+		fields, err = parseStreamFields(fields[:0], streams[i].Value)
 		if err != nil {
 			continue
 		}
 		hits := streams[i].Hits
-		for j := range labels {
-			f(labels[j], hits)
+		for j := range fields {
+			f(fields[j], hits)
 		}
 	}
 }

-func parseStreamLabels(dst []Field, s string) ([]Field, error) {
+func parseStreamFields(dst []Field, s string) ([]Field, error) {
 	if len(s) == 0 || s[0] != '{' {
 		return dst, fmt.Errorf("missing '{' at the beginning of stream name")
 	}
@ -1130,14 +1052,14 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
 	for {
 		n := strings.Index(s, `="`)
 		if n < 0 {
-			return dst, fmt.Errorf("cannot find label value in double quotes at [%s]", s)
+			return dst, fmt.Errorf("cannot find field value in double quotes at [%s]", s)
 		}
 		name := s[:n]
 		s = s[n+1:]

 		value, nOffset := tryUnquoteString(s, "")
 		if nOffset < 0 {
-			return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s)
+			return dst, fmt.Errorf("cannot find parse field value in double quotes at [%s]", s)
 		}
 		s = s[nOffset:]

--- a/lib/logstorage/storage_search_test.go
+++ b/lib/logstorage/storage_search_test.go
@ -650,11 +650,11 @@ func TestStorageSearch(t *testing.T) {
 	fs.MustRemoveAll(path)
 }

-func TestParseStreamLabelsSuccess(t *testing.T) {
+func TestParseStreamFieldsSuccess(t *testing.T) {
 	f := func(s, resultExpected string) {
 		t.Helper()

-		labels, err := parseStreamLabels(nil, s)
+		labels, err := parseStreamFields(nil, s)
 		if err != nil {
 			t.Fatalf("unexpected error: %s", err)
 		}