lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-25 00:30:58 +02:00
parent 7a5000656e
commit e2590f0485
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
58 changed files with 1561 additions and 457 deletions

View file

@ -122,7 +122,7 @@ VictoriaMetrics ecosystem contains the following components additionally to [sin
- [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/) and [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/) -
tools for creating backups and restoring from backups for VictoriaMetrics data.
- `vminsert`, `vmselect` and `vmstorage` - components of [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/).
- [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) - user-friendly cost-efficient database for logs.
- [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) - user-friendly cost-efficient database for logs.
## Operation

View file

@ -47,7 +47,7 @@ func main() {
vlinsert.Init()
go httpserver.Serve(listenAddrs, useProxyProtocol, requestHandler)
logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/VictoriaLogs/", time.Since(startTime).Seconds())
logger.Infof("started VictoriaLogs in %.3f seconds; see https://docs.victoriametrics.com/victorialogs/", time.Since(startTime).Seconds())
pushmetrics.Init()
sig := procutil.WaitForSigterm()
@ -77,7 +77,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaLogs</h2></br>")
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/VictoriaLogs/'>https://docs.victoriametrics.com/VictoriaLogs/</a></br>")
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victorialogs/'>https://docs.victoriametrics.com/victorialogs/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
{"select/vmui", "Web UI for VictoriaLogs"},
@ -99,7 +99,7 @@ func usage() {
const s = `
victoria-logs is a log management and analytics service.
See the docs at https://docs.victoriametrics.com/VictoriaLogs/
See the docs at https://docs.victoriametrics.com/victorialogs/
`
flagutil.Usage(s)
}

View file

@ -14,7 +14,7 @@ import (
// CommonParams contains common HTTP parameters used by log ingestion APIs.
//
// See https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters
type CommonParams struct {
TenantID logstorage.TenantID
TimeField string

View file

@ -94,13 +94,13 @@ These flags can be inspected by running `vlogsgenerator -help`. Below are the mo
* `-start` - starting timestamp for generating logs. Logs are evenly generated on the [`-start` ... `-end`] interval.
* `-end` - ending timestamp for generating logs. Logs are evenly generated on the [`-start` ... `-end`] interval.
* `-activeStreams` - the number of active [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to generate.
* `-activeStreams` - the number of active [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to generate.
* `-logsPerStream` - the number of log entries to generate per each log stream. Log entries are evenly distributed on the [`-start` ... `-end`] interval.
The total number of generated logs can be calculated as `-activeStreams` * `-logsPerStream`.
For example, the following command generates `1_000_000` log entries on the time range `[2024-01-01 - 2024-02-01]` across `100`
[log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields), where every logs stream contains `10_000` log entries,
[log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), where every logs stream contains `10_000` log entries,
and writes them to `http://localhost:9428/insert/jsonline`:
```
@ -113,7 +113,7 @@ bin/vlogsgenerator \
### Churn rate
It is possible to generate churn rate for active [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
It is possible to generate churn rate for active [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
by specifying `-totalStreams` command-line flag bigger than `-activeStreams`. For example, the following command generates
logs for `1000` total streams, while the number of active streams equals to `100`. This means that at every time there are logs for `100` streams,
but these streams change over the given [`-start` ... `-end`] time range, so the total number of streams on the given time range becomes `1000`:

View file

@ -27,32 +27,32 @@ var (
start = newTimeFlag("start", "-1d", "Generated logs start from this time; see https://docs.victoriametrics.com/#timestamp-formats")
end = newTimeFlag("end", "0s", "Generated logs end at this time; see https://docs.victoriametrics.com/#timestamp-formats")
activeStreams = flag.Int("activeStreams", 100, "The number of active log streams to generate; see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields")
activeStreams = flag.Int("activeStreams", 100, "The number of active log streams to generate; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields")
totalStreams = flag.Int("totalStreams", 0, "The number of total log streams; if -totalStreams > -activeStreams, then some active streams are substituted with new streams "+
"during data generation")
logsPerStream = flag.Int64("logsPerStream", 1_000, "The number of log entries to generate per each log stream. Log entries are evenly distributed between -start and -end")
constFieldsPerLog = flag.Int("constFieldsPerLog", 3, "The number of fields with constaint values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
varFieldsPerLog = flag.Int("varFieldsPerLog", 1, "The number of fields with variable values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
dictFieldsPerLog = flag.Int("dictFieldsPerLog", 2, "The number of fields with up to 8 different values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
u8FieldsPerLog = flag.Int("u8FieldsPerLog", 1, "The number of fields with uint8 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
u16FieldsPerLog = flag.Int("u16FieldsPerLog", 1, "The number of fields with uint16 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
u32FieldsPerLog = flag.Int("u32FieldsPerLog", 1, "The number of fields with uint32 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
u64FieldsPerLog = flag.Int("u64FieldsPerLog", 1, "The number of fields with uint64 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
floatFieldsPerLog = flag.Int("floatFieldsPerLog", 1, "The number of fields with float64 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
ipFieldsPerLog = flag.Int("ipFieldsPerLog", 1, "The number of fields with IPv4 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
timestampFieldsPerLog = flag.Int("timestampFieldsPerLog", 1, "The number of fields with ISO8601 timestamps per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
jsonFieldsPerLog = flag.Int("jsonFieldsPerLog", 1, "The number of JSON fields to generate per each log entry; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
statInterval = flag.Duration("statInterval", 10*time.Second, "The interval between publishing the stats")
)

View file

@ -20,19 +20,19 @@ import (
var (
retentionPeriod = flagutil.NewDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+
"log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+
"see https://docs.victoriametrics.com/VictoriaLogs/#retention")
"see https://docs.victoriametrics.com/victorialogs/#retention")
futureRetention = flagutil.NewDuration("futureRetention", "2d", "Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; "+
"see https://docs.victoriametrics.com/VictoriaLogs/#retention")
"see https://docs.victoriametrics.com/victorialogs/#retention")
storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory with the VictoriaLogs data; "+
"see https://docs.victoriametrics.com/VictoriaLogs/#storage")
"see https://docs.victoriametrics.com/victorialogs/#storage")
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
logNewStreams = flag.Bool("logNewStreams", false, "Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; "+
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields ; see also -logIngestedRows")
logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
"see https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/ ; see also -logNewStreams")
"see https://docs.victoriametrics.com/victorialogs/data-ingestion/ ; see also -logNewStreams")
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which "+
"the storage stops accepting new data")
)

View file

@ -70,7 +70,7 @@ const ExploreLogsHeader: FC<ExploreLogHeaderProps> = ({ query, limit, error, onC
<a
className="vm-link vm-link_with-icon"
target="_blank"
href="https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html"
href="https://docs.victoriametrics.com/victorialogs/logsql/"
rel="help noreferrer"
>
<InfoIcon/>
@ -79,7 +79,7 @@ const ExploreLogsHeader: FC<ExploreLogHeaderProps> = ({ query, limit, error, onC
<a
className="vm-link vm-link_with-icon"
target="_blank"
href="https://docs.victoriametrics.com/VictoriaLogs/"
href="https://docs.victoriametrics.com/victorialogs/"
rel="help noreferrer"
>
<WikiIcon/>

View file

@ -185,7 +185,7 @@ To access Grafana use link [http://localhost:3000](http://localhost:3000).
To access [VictoriaLogs UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui)
use link [http://localhost:9428/select/vmui](http://localhost:9428/select/vmui).
Please, also see [how to monitor](https://docs.victoriametrics.com/VictoriaLogs/#monitoring)
Please, also see [how to monitor](https://docs.victoriametrics.com/victorialogs/#monitoring)
VictoriaLogs installations.
To shutdown environment execute the following command:

View file

@ -43,7 +43,7 @@ services:
# storing logs and serving read queries.
victorialogs:
container_name: victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
command:
- "--storageDataPath=/vlogs"
- "--httpListenAddr=:9428"

View file

@ -23,8 +23,8 @@ The docker compose file contains the following components:
Querying the data
* [vmui](https://docs.victoriametrics.com/VictoriaLogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
the example of filebeat configuration(`filebeat.yml`)
@ -51,4 +51,4 @@ http:
port: 5066
```
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to achieve better performance.
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -22,7 +22,7 @@ services:
- -beat.uri=http://filebeat-victorialogs:5066
victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-filebeat-docker-vl:/vlogs
ports:

View file

@ -20,8 +20,8 @@ The docker compose file contains the following components:
Querying the data
* [vmui](https://docs.victoriametrics.com/VictoriaLogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
the example of filebeat configuration(`filebeat.yml`)
@ -42,4 +42,4 @@ output.elasticsearch:
_stream_fields: "host.name,process.program,process.pid,container.name"
```
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to achieve better performance.
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -13,7 +13,7 @@ services:
- "5140:5140"
victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-filebeat-syslog-vl:/vlogs
ports:

View file

@ -20,8 +20,8 @@ The docker compose file contains the following components:
Querying the data
* [vmui](https://docs.victoriametrics.com/VictoriaLogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
the example of fluentbit configuration(`filebeat.yml`)
@ -58,4 +58,4 @@ the example of fluentbit configuration(`filebeat.yml`)
header ProjectID 0
```
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to achieve better performance.
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -11,7 +11,7 @@ services:
- "5140:5140"
victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-fluentbit-vl:/vlogs
ports:

View file

@ -25,8 +25,8 @@ The docker compose file contains the following components:
Querying the data
* [vmui](https://docs.victoriametrics.com/VictoriaLogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
Here is an example of logstash configuration(`pipeline.conf`):
@ -53,4 +53,4 @@ output {
}
```
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to achieve better performance.
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -14,7 +14,7 @@ services:
- "5140:5140"
victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-logstash-vl:/vlogs
ports:

View file

@ -12,7 +12,7 @@ services:
- "5140:5140"
vlogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-promtail-docker:/vlogs
ports:

View file

@ -21,8 +21,8 @@ The docker compose file contains the following components:
Querying the data
* [vmui](https://docs.victoriametrics.com/VictoriaLogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
the example of vector configuration(`vector.toml`)
@ -58,4 +58,4 @@ the example of vector configuration(`vector.toml`)
ProjectID = "0"
```
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) to achieve better performance.
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -22,7 +22,7 @@ services:
condition: service_healthy
victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- victorialogs-vector-docker-vl:/vlogs
ports:

View file

@ -3,7 +3,7 @@ version: '3'
services:
# Run `make package-victoria-logs` to build victoria-logs image
vlogs:
image: docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
volumes:
- vlogs:/vlogs
ports:

View file

@ -22,7 +22,7 @@ with the number of CPU cores, RAM and available storage space.
The single-node version is easier to configure and operate compared to the cluster version, so think twice before choosing the cluster version.
See [this question](https://docs.victoriametrics.com/faq/#which-victoriametrics-type-is-recommended-for-use-in-production---single-node-or-cluster) for more details.
There is also user-friendly database for logs - [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/).
There is also user-friendly database for logs - [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/).
If you have questions about VictoriaMetrics, then feel free asking them at [VictoriaMetrics community Slack chat](https://victoriametrics.slack.com/),
you can join it via [Slack Inviter](https://slack.victoriametrics.com/).

View file

@ -125,7 +125,7 @@ VictoriaMetrics ecosystem contains the following components additionally to [sin
- [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/) and [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/) -
tools for creating backups and restoring from backups for VictoriaMetrics data.
- `vminsert`, `vmselect` and `vmstorage` - components of [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/).
- [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) - user-friendly cost-efficient database for logs.
- [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) - user-friendly cost-efficient database for logs.
## Operation

View file

@ -133,7 +133,7 @@ VictoriaMetrics ecosystem contains the following components additionally to [sin
- [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/) and [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/) -
tools for creating backups and restoring from backups for VictoriaMetrics data.
- `vminsert`, `vmselect` and `vmstorage` - components of [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/).
- [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) - user-friendly cost-efficient database for logs.
- [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) - user-friendly cost-efficient database for logs.
## Operation

View file

@ -15,10 +15,20 @@ aliases:
# VictoriaLogs changelog
The following `tip` changes can be tested by building VictoriaLogs from the latest commit of [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/) repository
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickStart.html#building-from-source-code)
according to [these docs](https://docs.victoriametrics.com/victorialogs/quickstart/#building-from-source-code)
## tip
## [v0.11.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.11.0-victorialogs)
Released at 2024-05-25
* FEATURE: add [`replace` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace-pipe), which allows replacing substrings in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
* FEATURE: support [comparing](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter) log field values with [special numeric values](https://docs.victoriametrics.com/victorialogs/logsql/#numeric-values). For example, `duration:>1.5s` and `response_size:<15KiB` are valid filters now.
* FEATURE: properly sort [durations](https://docs.victoriametrics.com/victorialogs/logsql/#duration-values) and [short numeric values](https://docs.victoriametrics.com/victorialogs/logsql/#short-numeric-values) in [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe). For example, `10s` goes in front of `1h`, while `10KB` goes in front of `1GB`.
* FEATURE: add an ability to preserve the original non-empty field values when executing [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe), [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) and [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) pipes.
* FEATURE: add an ability to preserve the original field values if the corresponding unpacked values are empty when executing [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe), [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) and [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) pipes.
## [v0.10.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.10.0-victorialogs)
Released at 2024-05-24
@ -38,7 +48,7 @@ Released at 2024-05-24
Released at 2024-05-22
* BUGFIX: [web UI](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui): fix loading web UI, which has been broken in [v0.9.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.0-victorialogs).
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix loading web UI, which has been broken in [v0.9.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.0-victorialogs).
## [v0.9.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.9.0-victorialogs)
@ -55,10 +65,10 @@ Released at 2024-05-22
* FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details.
* FEATURE: add `/select/logsql/stream_label_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names) for details.
* FEATURE: add `/select/logsql/stream_label_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values) for details.
* FEATURE: [web UI](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args.
* BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306).
* BUGFIX: [web UI](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui): fix an issue where logs with long `_msg` values might not display. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6281).
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix an issue where logs with long `_msg` values might not display. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6281).
* BUGFIX: properly handle time range boundaries with millisecond precision. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6293).
## [v0.8.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.8.0-victorialogs)
@ -148,8 +158,8 @@ Released at 2024-03-01
Released at 2023-11-15
* BUGFIX: properly locate logs for the [requested streams](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter). Previously logs for some streams may be missing in query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4856). Thanks to @XLONG96 for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5295)!
* BUGFIX: [web UI](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui): properly sort found logs by time. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5300).
* BUGFIX: properly locate logs for the [requested streams](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). Previously logs for some streams may be missing in query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4856). Thanks to @XLONG96 for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5295)!
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): properly sort found logs by time. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5300).
## [v0.4.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.4.1-victorialogs)
@ -161,12 +171,12 @@ Released at 2023-10-04
Released at 2023-10-03
* FEATURE: add `-elasticsearch.version` command-line flag, which can be used for specifying Elasticsearch version returned by VictoriaLogs to Filebeat at [elasticsearch bulk API](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#elasticsearch-bulk-api). This helps resolving [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4777).
* FEATURE: expose the following metrics at [/metrics](https://docs.victoriametrics.com/VictoriaLogs/#monitoring) page:
* `vl_data_size_bytes{type="storage"}` - on-disk size for data excluding [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) indexes.
* `vl_data_size_bytes{type="indexdb"}` - on-disk size for [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) indexes.
* FEATURE: add `-elasticsearch.version` command-line flag, which can be used for specifying Elasticsearch version returned by VictoriaLogs to Filebeat at [elasticsearch bulk API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#elasticsearch-bulk-api). This helps resolving [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4777).
* FEATURE: expose the following metrics at [/metrics](https://docs.victoriametrics.com/victorialogs/#monitoring) page:
* `vl_data_size_bytes{type="storage"}` - on-disk size for data excluding [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) indexes.
* `vl_data_size_bytes{type="indexdb"}` - on-disk size for [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) indexes.
* FEATURE: add `-insert.maxFieldsPerLine` command-line flag, which can be used for limiting the number of fields per line in logs sent to VictoriaLogs via ingestion protocols. This helps to avoid issues like [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4762).
* FEATURE: expose `vl_http_request_duration_seconds` histogram at the [/metrics](https://docs.victoriametrics.com/VictoriaLogs/#monitoring) page. Thanks to @crossoverJie for [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4934).
* FEATURE: expose `vl_http_request_duration_seconds` histogram at the [/metrics](https://docs.victoriametrics.com/victorialogs/#monitoring) page. Thanks to @crossoverJie for [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4934).
* FEATURE: add support of `-storage.minFreeDiskSpaceBytes` command-line flag to allow switching to read-only mode when running out of disk space at `-storageDataPath`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4737).
* BUGFIX: fix possible panic when no data is written to VictoriaLogs for a long time. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4895). Thanks to @crossoverJie for filing and fixing the issue.
@ -177,15 +187,15 @@ Released at 2023-10-03
Released at 2023-07-20
* FEATURE: add support for data ingestion via Promtail (aka default log shipper for Grafana Loki). See [these](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Promtail.html) and [these](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#loki-json-api) docs.
* FEATURE: add support for data ingestion via Promtail (aka default log shipper for Grafana Loki). See [these](https://docs.victoriametrics.com/victorialogs/data-ingestion/Promtail.html) and [these](https://docs.victoriametrics.com/victorialogs/data-ingestion/#loki-json-api) docs.
## [v0.2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.2.0-victorialogs)
Released at 2023-07-17
* FEATURE: support short form of `_time` filters over the last X minutes/hours/days/etc. For example, `_time:5m` is a short form for `_time:(now-5m, now]`, which matches logs with [timestamps](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) for the last 5 minutes. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details.
* FEATURE: add ability to specify offset for the selected time range. For example, `_time:5m offset 1h` is equivalent to `_time:(now-5m-1h, now-1h]`. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details.
* FEATURE: [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html): replace `exact_prefix("...")` with `exact("..."*)`. This makes it consistent with [i()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#case-insensitive-filter) filter, which can accept phrases and prefixes, e.g. `i("phrase")` and `i("phrase"*)`. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#exact-prefix-filter).
* FEATURE: support short form of `_time` filters over the last X minutes/hours/days/etc. For example, `_time:5m` is a short form for `_time:(now-5m, now]`, which matches logs with [timestamps](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) for the last 5 minutes. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details.
* FEATURE: add ability to specify offset for the selected time range. For example, `_time:5m offset 1h` is equivalent to `_time:(now-5m-1h, now-1h]`. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details.
* FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): replace `exact_prefix("...")` with `exact("..."*)`. This makes it consistent with [i()](https://docs.victoriametrics.com/victorialogs/logsql/#case-insensitive-filter) filter, which can accept phrases and prefixes, e.g. `i("phrase")` and `i("phrase"*)`. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter).
## [v0.1.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.1.0-victorialogs)

View file

@ -35,15 +35,15 @@ VictoriaLogs is optimized specifically for logs. So it provides the following fe
- Up to 30x less RAM usage than Elasticsearch for the same workload.
- Up to 15x less disk space usage than Elasticsearch for the same amounts of stored logs.
- Ability to work with hundreds of terabytes of logs on a single node.
- Very easy to use query language optimized for typical log analysis tasks - [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
- Fast full-text search over all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) out of the box.
- Good integration with traditional command-line tools for log analysis. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
- Very easy to use query language optimized for typical log analysis tasks - [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
- Fast full-text search over all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) out of the box.
- Good integration with traditional command-line tools for log analysis. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line).
## What is the difference between VictoriaLogs and Grafana Loki?
Both Grafana Loki and VictoriaLogs are designed for log management and processing.
Both systems support [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) concept.
Both systems support [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) concept.
VictoriaLogs and Grafana Loki have the following differences:
@ -51,13 +51,13 @@ VictoriaLogs and Grafana Loki have the following differences:
It starts consuming huge amounts of RAM and working very slow when logs with high-cardinality fields are ingested into it.
See [these docs](https://grafana.com/docs/loki/latest/best-practices/) for details.
VictoriaMetrics supports high-cardinality [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
VictoriaMetrics supports high-cardinality [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
It automatically indexes all the ingested log fields and allows performing fast full-text search over any field.
- Grafana Loki provides very inconvenient query language - [LogQL](https://grafana.com/docs/loki/latest/logql/).
This query language is hard to use for typical log analysis tasks.
VictoriaMetrics provides easy to use query language for typical log analysis tasks - [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
VictoriaMetrics provides easy to use query language for typical log analysis tasks - [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
- VictoriaLogs performs typical full-text queries up to 1000x faster than Grafana Loki.
@ -71,7 +71,7 @@ VictoriaLogs and Grafana Loki have the following differences:
ClickHouse is an extremely fast and efficient analytical database. It can be used for logs storage, analysis and processing.
VictoriaLogs is designed solely for logs. VictoriaLogs uses [similar design ideas as ClickHouse](#how-does-victorialogs-work) for achieving high performance.
- ClickHouse is good for logs if you know the set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) beforehand.
- ClickHouse is good for logs if you know the set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) beforehand.
Then you can create a table with a column per each log field and achieve the maximum possible query performance.
If the set of log fields isn't known beforehand, or if it can change at any time, then ClickHouse can still be used,
@ -81,18 +81,18 @@ VictoriaLogs is designed solely for logs. VictoriaLogs uses [similar design idea
for achieving high efficiency and query performance.
VictoriaLogs works optimally with any log types out of the box - structured, unstructured and mixed.
It works optimally with any sets of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model),
It works optimally with any sets of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
which can change in any way across different log sources.
- ClickHouse provides SQL dialect with additional analytical functionality. It allows performing arbitrary complex analytical queries
over the stored logs.
VictoriaLogs provides easy to use query language with full-text search specifically optimized
for log analysis - [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
for log analysis - [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
LogsQL is usually much easier to use than SQL for typical log analysis tasks, while some
non-trivial analytics may require SQL power.
- VictoriaLogs accepts logs from popular log shippers out of the box - see [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
- VictoriaLogs accepts logs from popular log shippers out of the box - see [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
ClickHouse needs an intermediate applications for converting the ingested logs into `INSERT` SQL statements for the particular database schema.
This may increase the complexity of the system and, subsequently, increase its' maintenance costs.
@ -100,7 +100,7 @@ VictoriaLogs is designed solely for logs. VictoriaLogs uses [similar design idea
## How does VictoriaLogs work?
VictoriaLogs accepts logs as [JSON entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
VictoriaLogs accepts logs as [JSON entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
It then stores every field value into a distinct data block. E.g. values for the same field across multiple log entries
are stored in a single data block. This allow reading data blocks only for the needed fields during querying.
@ -119,11 +119,11 @@ This architecture is inspired by [ClickHouse architecture](https://clickhouse.co
On top of this, VictoriaLogs employs additional optimizations for achieving high query performance:
- It uses [bloom filters](https://en.wikipedia.org/wiki/Bloom_filter) for skipping blocks without the given
[word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word-filter) or [phrase](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#phrase-filter).
[word](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrase](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
- It uses custom encoding and compression for fields with different data types.
For example, it encodes IP addresses as 4-byte tuples. Custom fields' encoding reduces data size on disk and improves query performance.
- It physically groups logs for the same [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
- It physically groups logs for the same [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
close to each other. This improves compression ratio, which helps reducing disk space usage. This also improves query performance
by skipping blocks for unneeded streams when [stream filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) is used.
- It maintains sparse index for [log timestamps](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field),
which allow improving query performance when [time filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) is used.
by skipping blocks for unneeded streams when [stream filter](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) is used.
- It maintains sparse index for [log timestamps](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field),
which allow improving query performance when [time filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) is used.

View file

@ -12,10 +12,10 @@ aliases:
# LogsQL
LogsQL is a simple yet powerful query language for [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/).
LogsQL is a simple yet powerful query language for [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/).
It provides the following features:
- Full-text search across [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Full-text search across [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [word filter](#word-filter), [phrase filter](#phrase-filter) and [prefix filter](#prefix-filter).
- Ability to combine filters into arbitrary complex [logical filters](#logical-filter).
- Ability to extract structured fields from unstructured logs at query time. See [these docs](#transformations).
@ -23,14 +23,14 @@ It provides the following features:
## LogsQL tutorial
If you aren't familiar with VictoriaLogs, then start with [key concepts docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html).
If you aren't familiar with VictoriaLogs, then start with [key concepts docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/).
Then follow these docs:
- [How to run VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/QuickStart.html).
- [how to ingest data into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [How to run VictoriaLogs](https://docs.victoriametrics.com/victorialogs/quickstart/).
- [how to ingest data into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
The simplest LogsQL query is just a [word](#word), which must be found in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
The simplest LogsQL query is just a [word](#word), which must be found in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
For example, the following query finds all the logs with `error` word:
```logsql
@ -57,7 +57,7 @@ finds log messages with the `error: cannot find file` phrase:
"error: cannot find file"
```
Queries above match logs with any [timestamp](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field),
Queries above match logs with any [timestamp](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field),
e.g. they may return logs from the previous year alongside recently ingested logs.
Usually logs from the previous year aren't so interesting comparing to the recently ingested logs.
@ -72,9 +72,9 @@ error AND _time:5m
This query consists of two [filters](#filters) joined with `AND` [operator](#logical-filter):
- The filter on the `error` [word](#word).
- The filter on the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field).
- The filter on the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field).
The `AND` operator means that the [log entry](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must match both filters in order to be selected.
The `AND` operator means that the [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must match both filters in order to be selected.
Typical LogsQL query constists of multiple [filters](#filters) joined with `AND` operator. It may be tiresome typing and then reading all these `AND` words.
So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above:
@ -83,7 +83,7 @@ So LogsQL allows omitting `AND` words. For example, the following query is equiv
error _time:5m
```
The query returns all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) by default.
The query returns all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) by default.
See [how to query specific fields](#querying-specific-fields).
Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line.
@ -121,8 +121,8 @@ Read [these docs](#logical-filter) for more details. There is no need in remembe
just wrap the needed query parts into explicit parentheses if you aren't sure in priority rules.
As an additional bonus, explicit parentheses make queries easier to read and maintain.
Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
This word can be stored in other [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) such as `log.level`.
Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
This word can be stored in other [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) such as `log.level`.
How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word:
```logsq
@ -149,8 +149,8 @@ _time:5m log.level:error !app:(buggy_app OR foobar)
```
The `app` field uniquely identifies the application instance if a single instance runs per each unique `app`.
In this case it is recommended associating the `app` field with [log stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/). This usually improves both compression rate
In this case it is recommended associating the `app` field with [log stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). This usually improves both compression rate
and query performance when querying the needed streams via [`_stream` filter](#stream-filter).
If the `app` field is associated with the log stream, then the query above can be rewritten to more performant one:
@ -169,7 +169,7 @@ Now you are familiar with LogsQL basics. Read [query syntax](#query-syntax) if y
#### Word
LogsQL splits all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) into words
LogsQL splits all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into words
delimited by non-word chars such as whitespace, parens, punctuation chars, etc. For example, the `foo: (bar,"тест")!` string
is split into `foo`, `bar` and `тест` words. Words can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8) chars.
These words are taken into account by full-text search filters such as
@ -199,8 +199,8 @@ See [the list of supported pipes in LogsQL](#pipes).
LogsQL supports various filters for searching for log messages (see below).
They can be combined into arbitrary complex queries via [logical filters](#logical-filter).
Filters are applied to [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) by default.
If the filter must be applied to other [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model),
Filters are applied to [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) by default.
If the filter must be applied to other [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
then its' name followed by the colon must be put in front of the filter. For example, if `error` [word filter](#word-filter) must be applied
to the `log.level` field, then use `log.level:error` query.
@ -215,34 +215,34 @@ If doubt, it is recommended quoting field names and filter args.
The list of LogsQL filters:
- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) in the given time range
- [Stream filter](#stream-filter) - matches logs, which belong to the given [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) in the given time range
- [Stream filter](#stream-filter) - matches logs, which belong to the given [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
- [Word filter](#word-filter) - matches logs with the given [word](#word)
- [Phrase filter](#phrase-filter) - matches logs with the given phrase
- [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix
- [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
- [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
- [Exact filter](#exact-filter) - matches logs with the exact value
- [Exact prefix filter](#exact-prefix-filter) - matches logs starting with the given prefix
- [Multi-exact filter](#multi-exact-filter) - matches logs with one of the specified exact values
- [Case-insensitive filter](#case-insensitive-filter) - matches logs with the given case-insensitive word, phrase or prefix
- [Sequence filter](#sequence-filter) - matches logs with the given sequence of words or phrases
- [Regexp filter](#regexp-filter) - matches logs for the given regexp
- [Range filter](#range-filter) - matches logs with numeric [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range
- [IPv4 range filter](#ipv4-range-filter) - matches logs with ip address [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given range
- [String range filter](#string-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in the given string range
- [Length range filter](#length-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) of the given length range
- [Range filter](#range-filter) - matches logs with numeric [field values](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in the given range
- [IPv4 range filter](#ipv4-range-filter) - matches logs with ip address [field values](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in the given range
- [String range filter](#string-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in the given string range
- [Length range filter](#length-range-filter) - matches logs with [field values](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) of the given length range
- [Logical filter](#logical-filter) - allows combining other filters
### Time filter
VictoriaLogs scans all the logs per each query if it doesn't contain the filter on [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field).
VictoriaLogs scans all the logs per each query if it doesn't contain the filter on [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field).
It uses various optimizations in order to accelerate full scan queries without the `_time` filter,
but such queries can be slow if the storage contains large number of logs over long time range. The easiest way to optimize queries
is to narrow down the search with the filter on [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field).
is to narrow down the search with the filter on [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field).
For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
For example, the following query returns [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
ingested into VictoriaLogs during the last hour, which contain the `error` [word](#word):
```logsql
@ -295,11 +295,11 @@ See also:
### Stream filter
VictoriaLogs provides an optimized way to select log entries, which belong to particular [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
VictoriaLogs provides an optimized way to select log entries, which belong to particular [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done via `_stream:{...}` filter. The `{...}` may contain arbitrary
[Prometheus-compatible label selector](https://docs.victoriametrics.com/keyconcepts/#filtering)
over fields associated with [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
For example, the following query selects [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
over fields associated with [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
For example, the following query selects [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
with `app` field equal to `nginx`:
```logsql
@ -330,13 +330,13 @@ See also:
### Word filter
The simplest LogsQL query consists of a single [word](#word) to search in log messages. For example, the following query matches
[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with `error` [word](#word) inside them:
[log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `error` [word](#word) inside them:
```logsql
error
```
This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
This query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `error`
- `an error happened`
@ -347,8 +347,8 @@ This query doesn't match the following log messages:
- `ERROR`, since the filter is case-sensitive by default. Use `i(error)` for this case. See [these docs](#case-insensitive-filter) for details.
- `multiple errors occurred`, since the `errors` word doesn't match `error` word. Use `error*` for this case. See [these docs](#prefix-filter) for details.
By default the given [word](#word) is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the word and put a colon after it
By default the given [word](#word) is searched in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the word and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries containing the `error` [word](#word) in the `log.level` field:
```logsql
@ -380,14 +380,14 @@ See also:
Is you need to search for log messages with the specific phrase inside them, then just wrap the phrase in quotes.
The phrase can contain any chars, including whitespace, punctuation, parens, etc. They are taken into account during the search.
For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
For example, the following query matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with `ssh: login fail` phrase inside them:
```logsql
"ssh: login fail"
```
This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
This query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `ERROR: ssh: login fail for user "foobar"`
- `ssh: login fail!`
@ -410,8 +410,8 @@ logs with `"foo":"bar"` phrase:
'"foo":"bar"'
```
By default the given phrase is searched in the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the phrase and put a colon after it
By default the given phrase is searched in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the phrase and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries containing the `cannot open file` phrase in the `event.original` field:
```logsql
@ -442,13 +442,13 @@ See also:
### Prefix filter
If you need to search for log messages with [words](#word) / phrases containing some prefix, then just add `*` char to the end of the [word](#word) / phrase in the query.
For example, the following query returns [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain [words](#word) with `err` prefix:
For example, the following query returns [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), which contain [words](#word) with `err` prefix:
```logsql
err*
```
This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
This query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `err: foobar`
- `cannot open file: error occurred`
@ -459,13 +459,13 @@ This query doesn't match the following log messages:
- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details.
Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches
[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix:
[log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing phrases with `unexpected fail` prefix:
```logsql
"unexpected fail"*
```
This query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
This query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `unexpected fail: IO error`
- `error:unexpected failure`
@ -484,8 +484,8 @@ logs with `"foo":"bar` prefix:
'"foo":"bar'*
```
By default the prefix filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the prefix filter
By default the prefix filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the prefix filter
in order to apply it to the given field. For example, the following query matches `log.level` field containing any word with the `err` prefix:
```logsql
@ -517,7 +517,7 @@ See also:
### Empty value filter
Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
This can be performed with `log_field:""` syntax. For example, the following query matches log entries without `host.hostname` field:
```logsql
@ -533,7 +533,7 @@ See also:
### Any value filter
Sometimes it is needed to find log entries containing any non-empty value for the given [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
Sometimes it is needed to find log entries containing any non-empty value for the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
This can be performed with `log_field:*` syntax. For example, the following query matches log entries with non-empty `host.hostname` field:
```logsql
@ -549,9 +549,9 @@ See also:
### Exact filter
The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages
or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact` filter.
or [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the exact value, then use the `exact` filter.
For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`:
```logsql
@ -566,8 +566,8 @@ The query doesn't match the following log messages:
- `FATAL ERROR: cannot find /foo/bar`, since the `exact` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case.
See [these docs](#case-insensitive-filter) for details.
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field:
```logsql
@ -606,7 +606,7 @@ For example, the following query matches log messages, which start from `Process
="Processing request"*
```
This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
This filter matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `Processing request foobar`
- `Processing requests from ...`
@ -618,8 +618,8 @@ It doesn't match the following log messages:
- `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case.
See [these docs](#phrase-filter) for details.
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix:
```logsql
@ -695,7 +695,7 @@ log messages with `error` word in any case:
i(error)
```
The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
The query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
- `unknown error happened`
- `ERROR: cannot read file`
@ -707,8 +707,8 @@ The query doesn't match the following log messages:
- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `~"(?i)error"` for this case. See [these docs](#regexp-filter) for details.
- `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case.
By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `log.level` field containing `error` [word](#word) in any case:
```logsql
@ -740,7 +740,7 @@ See also:
### Sequence filter
Sometimes it is needed to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
Sometimes it is needed to find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with [words](#word) or phrases in a particular order. For example, if log messages with `error` word followed by `open file` phrase
must be found, then the following LogsQL query can be used:
@ -753,8 +753,8 @@ The query doesn't match the `cannot open file: error` message, since the `open f
If you need matching log messages with both `error` word and `open file` phrase, then use `error AND "open file"` query. See [these docs](#logical-filter)
for details.
By default the `seq()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
By default the `seq()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `event.original` field containing `(error, "open file")` sequence:
```logsql
@ -785,7 +785,7 @@ For example, the following query returns all the log messages containing `err` o
~"err|warn"
```
The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain either `err` or `warn` substrings:
The query matches the following [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), which contain either `err` or `warn` substrings:
- `error: cannot read data`
- `2 warnings have been raised`
@ -804,8 +804,8 @@ logs matching `"foo":"(bar|baz)"` regexp:
'"foo":"(bar|baz)"'
```
By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings:
```logsql
@ -857,7 +857,7 @@ The following query returns logs with request durations smaller or equal to 1.5
request.duration:<=1.5
```
The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding
The lower and the upper bounds of the `range(lower, upper)` are excluded by default. If they must be included, then substitute the corresponding
parentheses with square brackets. For example:
- `range[1, 10)` includes `1` in the matching range
@ -866,15 +866,15 @@ parentheses with square brackets. For example:
The range boundaries can contain any [supported numeric values](#numeric-values).
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
[log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), since the `4.2` number is surrounded by other text.
Extract the numeric value from the message with `parse(_msg, "the request took <request_duration> seconds")` [transformation](#transformations)
and then apply the `range()` [filter pipe](#filter-pipe) to the extracted `request_duration` field.
Performance tips:
- It is better to query pure numeric [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- It is better to query pure numeric [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
instead of extracting numeric field from text field via [transformations](#transformations) at query time.
- See [other performance tips](#performance-tips).
@ -903,29 +903,29 @@ user.ip:ipv4_range("127.0.0.0/8")
```
If you need matching a single IPv4 address, then just put it inside `ipv4_range()`. For example, the following query matches `1.2.3.4` IP
at `user.ip` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
at `user.ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model):
```logsql
user.ip:ipv4_range("1.2.3.4")
```
Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")`
doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from <ip>: done")` [transformation](#transformations)
and then apply the `ipv4_range()` [filter pipe](#filter-pipe) to the extracted `ip` field.
Hints:
- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing the given `X.Y.Z.Q` IPv4 address,
- If you need searching for [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing the given `X.Y.Z.Q` IPv4 address,
then `"X.Y.Z.Q"` query can be used. See [these docs](#phrase-filter) for details.
- If you need searching for [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing
- If you need searching for [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing
at least a single IPv4 address out of the given list, then `"ip1" OR "ip2" ... OR "ipN"` query can be used. See [these docs](#logical-filter) for details.
- If you need finding log entries with `ip` field in multiple ranges, then use `ip:(ipv4_range(range1) OR ipv4_range(range2) ... OR ipv4_range(rangeN))` query.
See [these docs](#logical-filter) for details.
Performance tips:
- It is better querying pure IPv4 [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- It is better querying pure IPv4 [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
instead of extracting IPv4 from text field via [transformations](#transformations) at query time.
- See [other performance tips](#performance-tips).
@ -960,7 +960,7 @@ See also:
### Length range filter
If you need to filter log message by its length, then `len_range()` filter can be used.
For example, the following LogsQL query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
For example, the following LogsQL query matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with lengths in the range `[5, 10]` chars:
```logsql
@ -977,7 +977,7 @@ This query doesn't match the following log messages:
- `foo`, since it is too short
- `foo bar baz abc`, sinc it is too long
It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the length bigger or equal to 5 chars:
```logsql
@ -990,8 +990,8 @@ The range boundaries can be expressed in the following forms:
- Binary form. Form example, `len_range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `len_range(1_000, 2_345_678)`.
By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply
By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
Put the [field name](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) in front of the `len_range()` in order to apply
the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars:
```logsql
@ -1009,34 +1009,34 @@ See also:
Simpler LogsQL [filters](#filters) can be combined into more complex filters with the following logical operations:
- `q1 AND q2` - matches common log entries returned by both `q1` and `q2`. Arbitrary number of [filters](#filters) can be combined with `AND` operation.
For example, `error AND file AND app` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
For example, `error AND file AND app` matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
which simultaneously contain `error`, `file` and `app` [words](#word).
The `AND` operation is frequently used in LogsQL queries, so it is allowed to skip the `AND` word.
For example, `error file app` is equivalent to `error AND file AND app`.
- `q1 OR q2` - merges log entries returned by both `q1` and `q2`. Aribtrary number of [filters](#filters) can be combined with `OR` operation.
For example, `error OR warning OR info` matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
For example, `error OR warning OR info` matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
which contain at least one of `error`, `warning` or `info` [words](#word).
- `NOT q` - returns all the log entries except of those which match `q`. For example, `NOT info` returns all the
[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
[log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
which do not contain `info` [word](#word). The `NOT` operation is frequently used in LogsQL queries, so it is allowed substituting `NOT` with `!` in queries.
For example, `!info` is equivalent to `NOT info`.
The `NOT` operation has the highest priority, `AND` has the middle priority and `OR` has the lowest priority.
The priority order can be changed with parentheses. For example, `NOT info OR debug` is interpreted as `(NOT info) OR debug`,
so it matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
so it matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
which do not contain `info` [word](#word), while it also matches messages with `debug` word (which may contain the `info` word).
This is not what most users expect. In this case the query can be rewritten to `NOT (info OR debug)`,
which correctly returns log messages without `info` and `debug` [words](#word).
LogsQL supports arbitrary complex logical queries with arbitrary mix of `AND`, `OR` and `NOT` operations and parentheses.
By default logical filters apply to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
unless the inner filters explicitly specify the needed [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) via `field_name:filter` syntax.
By default logical filters apply to the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
unless the inner filters explicitly specify the needed [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) via `field_name:filter` syntax.
For example, `(error OR warn) AND host.hostname:host123` is interpreted as `(_msg:error OR _msg:warn) AND host.hostname:host123`.
It is possible to specify a single [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) for multiple filters
It is possible to specify a single [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for multiple filters
with the following syntax:
```logsql
@ -1050,7 +1050,7 @@ Performance tips:
- VictoriaLogs executes logical operations from the left to the right, so it is recommended moving the most specific
and the fastest filters (such as [word filter](#word-filter) and [phrase filter](#phrase-filter)) to the left,
while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter))
to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` word, which match some `/foo/(bar|baz)` regexp,
it is better from performance PoV to use the query `error ~"/foo/(bar|baz)"` instead of `~"/foo/(bar|baz)" error`.
@ -1071,25 +1071,26 @@ _time:5m | stats by (_stream) count() per_stream_logs | sort by (per_stream_logs
LogsQL supports the following pipes:
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`filter`](#filter-pipe) applies additional [filters](#filters) to results.
- [`format`](#format-pipe) formats ouptut field from input [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`format`](#format-pipe) formats ouptut field from input [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`limit`](#limit-pipe) limits the number selected logs.
- [`offset`](#offset-pipe) skips the given number of selected logs.
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`replace`](#replace-pipe) replaces substrings in the specified [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`stats`](#stats-pipe) calculates various stats over the selected logs.
- [`uniq`](#uniq-pipe) returns unique log entires.
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
### copy pipe
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be copied, then `| copy src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be copied, then `| copy src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
For example, the following query copies `host` field to `server` for logs over the last 5 minutes, so the output contains both `host` and `server` fields:
```logsq
@ -1116,7 +1117,7 @@ See also:
### delete pipe
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be deleted, then `| delete field1, ..., fieldN` [pipe](#pipes) can be used.
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be deleted, then `| delete field1, ..., fieldN` [pipe](#pipes) can be used.
For example, the following query deletes `host` and `app` fields from the logs over the last 5 minutes:
```logsql
@ -1133,7 +1134,7 @@ See also:
### extract pipe
`| extract "pattern" from field_name` [pipe](#pipes) allows extracting abitrary text into output fields according to the [`pattern`](#format-for-extract-pipe-pattern) from the given
[`field_name`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). Existing log fields remain unchanged after the `| extract ...` pipe.
[`field_name`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Existing log fields remain unchanged after the `| extract ...` pipe.
`| extract ...` can be useful for extracting additional fields needed for further data processing with other pipes such as [`stats` pipe](#stats-pipe) or [`sort` pipe](#sort-pipe).
@ -1162,6 +1163,21 @@ For example, the following query extracts `ip` from the corresponding JSON field
_time:5m | extract '"ip":"<ip>"'
```
Add `keep_original_fields` to the end of `extract ...` when the original non-empty values of the fields mentioned in the pattern must be preserved
instead of overwriting it with the extracted values. For example, the following query extracts `<ip>` only if the original value for `ip` field is missing or is empty:
```logsql
_time:5m | extract 'ip=<ip> ' keep_original_fields
```
By default `extract` writes empty matching fields to the output, which may overwrite existing values. Add `skip_empty_results` to the end of `extract ...`
in order to prevent from overwriting the existing values for the corresponding fields with empty values.
For example, the following query preserves the original `ip` field value if `foo` field doesn't contain the matching ip:
```logsql
_time:5m | extract 'ip=<ip> ' from foo skip_empty_results
```
See also:
- [Format for extract pipe pattern](#format-for-extract-pipe-pattern)
@ -1238,15 +1254,22 @@ For example, the following `pattern` properly matches `a < b` text by extracting
If some log entries must be skipped from [`extract` pipe](#extract-pipe), then add `if (<filters>)` filter after the `extract` word.
The `<filters>` can contain arbitrary [filters](#filters). For example, the following query extracts `ip` field
from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) only
if the input [log entry](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) doesn't contain `ip` field or this field is empty:
if the input [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) doesn't contain `ip` field or this field is empty:
```logsql
_time:5m | extract if (ip:"") "ip=<ip> "
```
An alternative approach is to add `keep_original_fields` to the end of `extract`, in order to keep the original non-empty values for the extracted fields.
For example, the following query is equivalent to the previous one:
```logsql
_time:5m | extract "ip=<ip> " keep_original_fields
```
### field_names pipe
`| field_names` [pipe](#pipes) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
`| field_names` [pipe](#pipes) returns all the names of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
with an estimated number of logs per each field name.
For example, the following query returns all the field names with the number of matching logs over the last 5 minutes:
@ -1262,7 +1285,7 @@ See also:
### fields pipe
By default all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) are returned in the response.
By default all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) are returned in the response.
It is possible to select the given set of log fields with `| fields field1, ..., fieldN` [pipe](#pipes). For example, the following query selects only `host`
and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields from logs for the last 5 minutes:
@ -1287,7 +1310,7 @@ See also:
Sometimes it is needed to apply additional filters on the calculated results. This can be done with `| filter ...` [pipe](#pipes).
The `filter` pipe can contain arbitrary [filters](#filters).
For example, the following query returns `host` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) values
For example, the following query returns `host` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values
if the number of log messages with the `error` [word](#word) for them over the last hour exceeds `1_000`:
```logsql
@ -1302,7 +1325,7 @@ See also:
### format pipe
`| format "pattern" as result_field` [pipe](#pipe) combines [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
according to the `pattern` and stores it to the `result_field`. All the other fields remain unchanged after the `| format ...` pipe.
according to the `pattern` and stores it to the `result_field`.
For example, the following query stores `request from <ip>:<port>` text into [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field),
by substituting `<ip>` and `<port>` with the corresponding [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values:
@ -1326,9 +1349,24 @@ and stores it into `my_json` output field:
_time:5m | format '{"_msg":<q:_msg>,"stacktrace":<q:stacktrace>}' as my_json
```
Add `keep_original_fields` to the end of `format ... as result_field` when the original non-empty value of the `result_field` must be preserved
instead of overwriting it with the `format` results. For example, the following query adds formatted result to `foo` field only if it was missing or empty:
```logsql
_time:5m | format 'some_text' as foo keep_original_fields
```
Add `skip_empty_results` to the end of `format ...` if emty results shouldn't be written to the output. For example, the following query adds formatted result to `foo` field
when at least `field1` or `field2` aren't empty, while preserving the original `foo` value:
```logsql
_time:5m | format "<field1><field2>" as foo skip_empty_results
```
See also:
- [Conditional format](#conditional-format)
- [`replace` pipe](#replace-pipe)
- [`extract` pipe](#extract-pipe)
@ -1383,7 +1421,7 @@ See also:
### rename pipe
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be renamed, then `| rename src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be renamed, then `| rename src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
For example, the following query renames `host` field to `server` for logs over the last 5 minutes, so the output contains `server` field instead of `host` field:
```logsql
@ -1406,10 +1444,53 @@ See also:
- [`fields` pipe](#fields-pipe)
- [`delete` pipe](#delete-pipe)
### replace pipe
`| replace ("old", "new") at field` [pipe](#pipes) replaces all the occurences of the `old` substring with the `new` substring
in the given [`field`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
For example, the following query replaces all the `secret-password` substrings with `***` in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
for logs over the last 5 minutes:
```logsql
_time:5m | replace ("secret-password", "***") at _msg
```
The `at _msg` part can be omitted if the replacement occurs in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | replace ("secret-password", "***")
```
The number of replacements can be limited with `limit N` at the end of `replace`. For example, the following query replaces only the first `foo` substring with `bar`
at the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) `baz`:
```logsql
_time:5m | replace ('foo', 'bar') at baz limit 1
```
See also:
- [Conditional replace](#conditional-replace)
- [`format` pipe](#format-pipe)
- [`extract` pipe](#extract-pipe)
#### Conditional replace
If the [`replace` pipe](#replace-pipe) musn't be applied to every [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
then add `if (<filters>)` after `replace`.
The `<filters>` can contain arbitrary [filters](#filters). For example, the following query replaces `secret` with `***` in the `password` field
only if `user_type` field equals to `admin`:
```logsql
_time:5m | replace if (user_type:=admin) replace ("secret", "***") at password
```
### sort pipe
By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) can be used.
The returned logs are sorted by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
The returned logs are sorted by the given [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
using [natural sorting](https://en.wikipedia.org/wiki/Natural_sort_order).
For example, the following query returns logs for the last 5 minutes sorted by [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
@ -1441,14 +1522,14 @@ Sorting of big number of logs can consume a lot of CPU time and memory. Sometime
or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe.
Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries.
For example, the following query returns top 10 log entries with the biggest values
for the `request_duration` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) during the last hour:
for the `request_duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) during the last hour:
```logsql
_time:1h | sort by (request_duration desc) limit 10
```
If the first `N` sorted results must be skipped, then `offset N` can be added to `sort` pipe. For example,
the following query skips the first 10 logs with the biggest `request_duration` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model),
the following query skips the first 10 logs with the biggest `request_duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
and then returns the next 20 sorted logs for the last 5 minutes:
```logsql
@ -1461,7 +1542,7 @@ It is recommended limiting the number of logs before sorting with the following
- Adding `limit N` to the end of `sort ...` pipe.
- Reducing the selected time range with [time filter](#time-filter).
- Using more specific [filters](#filters), so they select less logs.
- Limiting the number of selected [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) via [`fields` pipe](#fields-pipe).
- Limiting the number of selected [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) via [`fields` pipe](#fields-pipe).
See also:
@ -1692,6 +1773,20 @@ fields from JSON value stored in `my_json` [log field](https://docs.victoriametr
_time:5m | unpack_json from my_json fields (foo, bar)
```
If it is needed to preserve the original non-empty field values, then add `keep_original_fields` to the end of `unpack_json ...`. For example,
the following query preserves the original non-empty values for `ip` and `host` fields instead of overwriting them with the unpacked values:
```logsql
_time:5m | unpack_json from foo fields (ip, host) keep_original_fields
```
Add `skip_empty_results` to the end of `unpack_json ...` if the original field values must be preserved when the corresponding unpacked values are empty.
For example, the following query preserves the original `ip` and `host` field values for empty unpacked values:
```logsql
_time:5m | unpack_json fields (ip, host) skip_empty_results
```
Performance tip: if you need extracting a single field from long JSON, it is faster to use [`extract` pipe](#extract-pipe). For example, the following query extracts `"ip"` field from JSON
stored in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) at the maximum speed:
@ -1754,6 +1849,20 @@ from logfmt stored in the `my_logfmt` field:
_time:5m | unpack_logfmt from my_logfmt fields (foo, bar)
```
If it is needed to preserve the original non-empty field values, then add `keep_original_fields` to the end of `unpack_logfmt ...`. For example,
the following query preserves the original non-empty values for `ip` and `host` fields instead of overwriting them with the unpacked values:
```logsql
_time:5m | unpack_logfmt from foo fields (ip, host) keep_original_fields
```
Add `skip_empty_results` to the end of `unpack_logfmt ...` if the original field values must be preserved when the corresponding unpacked values are empty.
For example, the following query preserves the original `ip` and `host` field values for empty unpacked values:
```logsql
_time:5m | unpack_logfmt fields (ip, host) skip_empty_results
```
Performance tip: if you need extracting a single field from long [logfmt](https://brandur.org/logfmt) line, it is faster to use [`extract` pipe](#extract-pipe).
For example, the following query extracts `"ip"` field from [logfmt](https://brandur.org/logfmt) line stored
in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
@ -2136,43 +2245,43 @@ See also:
## Stream context
LogsQL will support the ability to select the given number of surrounding log lines for the selected log lines
on a [per-stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) basis.
on a [per-stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) basis.
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
See the [Roadmap](https://docs.victoriametrics.com/victorialogs/roadmap/) for details.
## Transformations
LogsQL supports the following transformations on the log entries selected with [filters](#filters):
- Extracting arbitrary text from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
- Extracting arbitrary text from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided pattern.
See [these docs](#extract-pipe) for details.
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_json-pipe).
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_logfmt-pipe).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided format. See [these docs](#format-pipe).
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_json-pipe).
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_logfmt-pipe).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided format. See [these docs](#format-pipe).
- Replacing substrings in the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#replace-pipe).
LogsQL will support the following transformations in the future:
- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
- Parsing duration strings into floating-point seconds for further [stats calculations](#stats-pipe).
- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
See the [Roadmap](https://docs.victoriametrics.com/victorialogs/roadmap/) for details.
It is also possible to perform various transformations on the [selected log entries](#filters) at client side
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line).
## Post-filters
Post-filtering of query results can be performed at any step by using [`filter` pipe](#filter-pipe).
It is also possible to perform post-filtering of the [selected log entries](#filters) at client side with `grep` and similar Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
according to [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line).
## Stats
Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
It is also possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
according to [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line).
## Sorting
@ -2182,7 +2291,7 @@ By default VictoriaLogs doesn't sort the returned results because of performance
LogsQL provides the following [pipes](#pipes) for limiting the number of returned log entries:
- [`fields`](#fields-pipe) and [`delete`](#delete-pipe) pipes allow limiting the set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) to return.
- [`fields`](#fields-pipe) and [`delete`](#delete-pipe) pipes allow limiting the set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) to return.
- [`limit` pipe](#limit-pipe) allows limiting the number of log entries to return.
## Querying specific fields
@ -2236,7 +2345,7 @@ Internally duration values are converted into nanoseconds.
- It is highly recommended specifying [time filter](#time-filter) in order to narrow down the search to specific time range.
- It is highly recommended specifying [stream filter](#stream-filter) in order to narrow down the search
to specific [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
to specific [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
- Move faster filters such as [word filter](#word-filter) and [phrase filter](#phrase-filter) to the beginning of the query.
This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
- Move more specific filters, which match lower number of log entries, to the beginning of the query.

View file

@ -13,8 +13,8 @@ aliases:
# VictoriaLogs Quick Start
It is recommended to read [README](https://docs.victoriametrics.com/VictoriaLogs/)
and [Key Concepts](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html)
It is recommended to read [README](https://docs.victoriametrics.com/victorialogs/)
and [Key Concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/)
before you start working with VictoriaLogs.
## How to install and run VictoriaLogs
@ -34,22 +34,22 @@ Just download archive for the needed Operating system and architecture, unpack i
For example, the following commands download VictoriaLogs archive for Linux/amd64, unpack and run it:
```sh
curl -L -O https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v0.10.0-victorialogs/victoria-logs-linux-amd64-v0.10.0-victorialogs.tar.gz
tar xzf victoria-logs-linux-amd64-v0.10.0-victorialogs.tar.gz
curl -L -O https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v0.11.0-victorialogs/victoria-logs-linux-amd64-v0.11.0-victorialogs.tar.gz
tar xzf victoria-logs-linux-amd64-v0.11.0-victorialogs.tar.gz
./victoria-logs-prod
```
VictoriaLogs is ready for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) at the TCP port `9428` now!
VictoriaLogs is ready for [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/) at the TCP port `9428` now!
It has no any external dependencies, so it may run in various environments without additional setup and configuration.
VictoriaLogs automatically adapts to the available CPU and RAM resources. It also automatically setups and creates
the needed indexes during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
the needed indexes during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
See also:
- [How to configure VictoriaLogs](#how-to-configure-victorialogs)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/)
### Docker image
@ -59,14 +59,14 @@ Here is the command to run VictoriaLogs in a Docker container:
```sh
docker run --rm -it -p 9428:9428 -v ./victoria-logs-data:/victoria-logs-data \
docker.io/victoriametrics/victoria-logs:v0.10.0-victorialogs
docker.io/victoriametrics/victoria-logs:v0.11.0-victorialogs
```
See also:
- [How to configure VictoriaLogs](#how-to-configure-victorialogs)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/)
### Helm charts
@ -96,17 +96,17 @@ Follow the following steps in order to build VictoriaLogs from source code:
bin/victoria-logs
```
VictoriaLogs is ready for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) at the TCP port `9428` now!
VictoriaLogs is ready for [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/) at the TCP port `9428` now!
It has no any external dependencies, so it may run in various environments without additional setup and configuration.
VictoriaLogs automatically adapts to the available CPU and RAM resources. It also automatically setups and creates
the needed indexes during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
the needed indexes during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
See also:
- [How to configure VictoriaLogs](#how-to-configure-victorialogs)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/)
## How to configure VictoriaLogs
@ -122,19 +122,19 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
```
VictoriaLogs stores the ingested data to the `victoria-logs-data` directory by default. The directory can be changed
via `-storageDataPath` command-line flag. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/#storage) for details.
via `-storageDataPath` command-line flag. See [these docs](https://docs.victoriametrics.com/victorialogs/#storage) for details.
By default VictoriaLogs stores [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html) with timestamps
By default VictoriaLogs stores [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/) with timestamps
in the time range `[now-7d, now]`, while dropping logs outside the given time range.
E.g. it uses the retention of 7 days. Read [these docs](https://docs.victoriametrics.com/VictoriaLogs/#retention) on how to control the retention
for the [ingested](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) logs.
E.g. it uses the retention of 7 days. Read [these docs](https://docs.victoriametrics.com/victorialogs/#retention) on how to control the retention
for the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs.
It is recommended setting up monitoring of VictoriaLogs according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/#monitoring).
It is recommended setting up monitoring of VictoriaLogs according to [these docs](https://docs.victoriametrics.com/victorialogs/#monitoring).
See also:
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/)
- [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/)
## Docker demos

View file

@ -12,33 +12,33 @@ from [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/).
VictoriaLogs provides the following key features:
- VictoriaLogs can accept logs from popular log collectors. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
- VictoriaLogs can accept logs from popular log collectors. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
- VictoriaLogs is much easier to set up and operate compared to Elasticsearch and Grafana Loki.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickStart.html).
See [these docs](https://docs.victoriametrics.com/victorialogs/quickstart/).
- VictoriaLogs provides easy yet powerful query language with full-text search capabilities across
all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) -
see [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -
see [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/).
- VictoriaLogs can be seamlessly combined with good old Unix tools for log analysis such as `grep`, `less`, `sort`, `jq`, etc.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line) for details.
See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line) for details.
- VictoriaLogs capacity and performance scales linearly with the available resources (CPU, RAM, disk IO, disk space).
It runs smoothly on both Raspberry PI and a server with hundreds of CPU cores and terabytes of RAM.
- VictoriaLogs can handle up to 30x bigger data volumes than Elasticsearch and Grafana Loki when running on the same hardware.
See [these docs](#benchmarks).
- VictoriaLogs supports fast full-text search over high-cardinality [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- VictoriaLogs supports fast full-text search over high-cardinality [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
such as `trace_id`, `user_id` and `ip`.
- VictoriaLogs supports multitenancy - see [these docs](#multitenancy).
- VictoriaLogs supports out-of-order logs' ingestion aka backfilling.
- VictoriaLogs provides a simple web UI for querying logs - see [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui).
- VictoriaLogs provides a simple web UI for querying logs - see [these docs](https://docs.victoriametrics.com/victorialogs/querying/#web-ui).
VictoriaLogs is at the Preview stage now. It is ready for evaluation in production and verifying the claims given above.
It isn't recommended to migrate from existing logging solutions to VictoriaLogs Preview in general cases yet.
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
See the [Roadmap](https://docs.victoriametrics.com/victorialogs/roadmap/) for details.
If you have questions about VictoriaLogs, then read [this FAQ](https://docs.victoriametrics.com/VictoriaLogs/FAQ.html).
If you have questions about VictoriaLogs, then read [this FAQ](https://docs.victoriametrics.com/victorialogs/faq/).
Also feel free asking any questions at [VictoriaMetrics community Slack chat](https://victoriametrics.slack.com/),
you can join it via [Slack Inviter](https://slack.victoriametrics.com/).
See [Quick start docs](https://docs.victoriametrics.com/VictoriaLogs/QuickStart.html) for start working with VictoriaLogs.
See [Quick start docs](https://docs.victoriametrics.com/victorialogs/quickstart/) for start working with VictoriaLogs.
## Monitoring
@ -51,11 +51,11 @@ VictoriaLogs emits its own logs to stdout. It is recommended to investigate thes
## Upgrading
It is safe upgrading VictoriaLogs to new versions unless [release notes](https://docs.victoriametrics.com/VictoriaLogs/CHANGELOG.html) say otherwise.
It is safe to skip multiple versions during the upgrade unless [release notes](https://docs.victoriametrics.com/VictoriaLogs/CHANGELOG.html) say otherwise.
It is safe upgrading VictoriaLogs to new versions unless [release notes](https://docs.victoriametrics.com/victorialogs/changelog/) say otherwise.
It is safe to skip multiple versions during the upgrade unless [release notes](https://docs.victoriametrics.com/victorialogs/changelog/) say otherwise.
It is recommended to perform regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
It is also safe to downgrade to older versions unless [release notes](https://docs.victoriametrics.com/VictoriaLogs/CHANGELOG.html) say otherwise.
It is also safe to downgrade to older versions unless [release notes](https://docs.victoriametrics.com/victorialogs/changelog/) say otherwise.
The following steps must be performed during the upgrade / downgrade procedure:
@ -77,10 +77,10 @@ For example, the following command starts VictoriaLogs with the retention of 8 w
/path/to/victoria-logs -retentionPeriod=8w
```
VictoriaLogs stores the [ingested](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) logs in per-day partition directories.
VictoriaLogs stores the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs in per-day partition directories.
It automatically drops partition directories outside the configured retention.
VictoriaLogs automatically drops logs at [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) stage
VictoriaLogs automatically drops logs at [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) stage
if they have timestamps outside the configured retention. A sample of dropped logs is logged with `WARN` message in order to simplify troubleshooting.
The `vl_rows_dropped_total` [metric](#monitoring) is incremented each time an ingested log entry is dropped because of timestamp outside the retention.
It is recommended to set up the following alerting rule at [vmalert](https://docs.victoriametrics.com/vmalert/) in order to be notified
@ -115,8 +115,8 @@ VictoriaLogs automatically creates the `-storageDataPath` directory on the first
## Multitenancy
VictoriaLogs supports multitenancy. A tenant is identified by `(AccountID, ProjectID)` pair, where `AccountID` and `ProjectID` are arbitrary 32-bit unsigned integers.
The `AccountID` and `ProjectID` fields can be set during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) via `AccountID` and `ProjectID` request headers.
The `AccountID` and `ProjectID` fields can be set during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/) via `AccountID` and `ProjectID` request headers.
If `AccountID` and/or `ProjectID` request headers aren't set, then the default `0` value is used.
@ -129,7 +129,7 @@ VictoriaLogs doesn't perform per-tenant authorization. Use [vmauth](https://docs
Here is a [benchmark suite](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/logs-benchmark) for comparing data ingestion performance
and resource usage between VictoriaLogs and Elasticsearch or Loki.
It is recommended [setting up VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/QuickStart.html) in production alongside the existing
It is recommended [setting up VictoriaLogs](https://docs.victoriametrics.com/victorialogs/quickstart/) in production alongside the existing
log management systems and comparing resource usage + query performance between VictoriaLogs and your system such as Elasticsearch or Grafana Loki.
Please share benchmark results and ideas on how to improve benchmarks / VictoriaLogs
@ -154,7 +154,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
-futureRetention value
Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; see https://docs.victoriametrics.com/VictoriaLogs/#retention
Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; see https://docs.victoriametrics.com/victorialogs/#retention
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 2d)
-gogc int
GOGC to use. See https://tip.golang.org/doc/gc-guide (default 100)
@ -201,9 +201,9 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
-internStringMaxLen int
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
-logIngestedRows
Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; see https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/ ; see also -logNewStreams
Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; see https://docs.victoriametrics.com/victorialogs/data-ingestion/ ; see also -logNewStreams
-logNewStreams
Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows
Whether to log creation of new streams; this can be useful for debugging of high cardinality issues with log streams; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields ; see also -logIngestedRows
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int
@ -244,7 +244,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
Supports an array of values separated by comma or specified via multiple flags.
-retentionPeriod value
Log entries with timestamps older than now-retentionPeriod are automatically deleted; log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); see https://docs.victoriametrics.com/VictoriaLogs/#retention
Log entries with timestamps older than now-retentionPeriod are automatically deleted; log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); see https://docs.victoriametrics.com/victorialogs/#retention
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 7d)
-search.maxConcurrentRequests int
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration (default 6)
@ -253,7 +253,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
-search.maxQueueDuration duration
The maximum time the search request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-storageDataPath string
Path to directory with the VictoriaLogs data; see https://docs.victoriametrics.com/VictoriaLogs/#storage (default "victoria-logs-data")
Path to directory with the VictoriaLogs data; see https://docs.victoriametrics.com/victorialogs/#storage (default "victoria-logs-data")
-storage.minFreeDiskSpaceBytes size
The minimum free disk space at -storageDataPath after which the storage stops accepting new data
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)

View file

@ -14,32 +14,32 @@ aliases:
# VictoriaLogs roadmap
The [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) Preview is ready for evaluation in production.
The [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) Preview is ready for evaluation in production.
It is recommended running it alongside the existing solutions such as Elasticsearch and Grafana Loki
and comparing their resource usage and usability.
It isn't recommended migrating from existing solutions to VictoriaLogs Preview yet.
The following functionality is available in VictoriaLogs Preview:
- [Data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
- [Querying](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [Querying via command-line](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
- [Data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
- [Querying](https://docs.victoriametrics.com/victorialogs/querying/).
- [Querying via command-line](https://docs.victoriametrics.com/victorialogs/querying/#command-line).
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/) for details.
See [these docs](https://docs.victoriametrics.com/victorialogs/) for details.
The following functionality is planned in the future versions of VictoriaLogs:
- Support for [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) from popular log collectors and formats:
- Support for [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) from popular log collectors and formats:
- OpenTelemetry for logs
- Fluentd
- Syslog
- Journald (systemd)
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
- [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/):
- [Transformation functions](https://docs.victoriametrics.com/victorialogs/logsql/#transformations).
- [Stream context](https://docs.victoriametrics.com/victorialogs/logsql/#stream-context).
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) aka `tail -f`.
- Web UI with the following abilities:
- Explore the ingested logs ([partially done](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui)).
- Explore the ingested logs ([partially done](https://docs.victoriametrics.com/victorialogs/querying/#web-ui)).
- Build graphs over time for the ingested logs via [hits HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats).
- Integration with Grafana ([partially done](https://github.com/VictoriaMetrics/victorialogs-datasource)).
- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).

View file

@ -13,7 +13,7 @@ aliases:
# Filebeat setup
Specify [`output.elasicsearch`](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) section in the `filebeat.yml`
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/):
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/):
```yaml
output.elasticsearch:
@ -26,11 +26,11 @@ output.elasticsearch:
Substitute the `localhost:9428` address inside `hosts` section with the real TCP address of VictoriaLogs.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) for details on the `parameters` section.
See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details on the `parameters` section.
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters)
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters)
and inspecting VictoriaLogs logs then:
```yaml
@ -43,8 +43,8 @@ output.elasticsearch:
debug: "1"
```
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters).
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
```yaml
@ -84,7 +84,7 @@ output.elasticsearch:
compression_level: 1
```
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy).
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy).
If you need storing logs in other tenant, then specify the needed tenant via `headers` at `output.elasticsearch` section.
For example, the following `filebeat.yml` config instructs Filebeat to store the data to `(AccountID=12, ProjectID=34)` tenant:
@ -118,7 +118,7 @@ command-line flag.
See also:
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
- [Filebeat `output.elasticsearch` docs](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html).
- [Docker-compose demo for Filebeat integration with VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/filebeat-docker).

View file

@ -13,7 +13,7 @@ aliases:
# Fluentbit setup
Specify [http output](https://docs.fluentbit.io/manual/pipeline/outputs/http) section in the `fluentbit.conf`
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/):
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/):
```conf
[Output]
@ -28,11 +28,11 @@ for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.co
Substitute the host (`localhost`) and port (`9428`) with the real TCP address of VictoriaLogs.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) for details on the query args specified in the `uri`.
See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details on the query args specified in the `uri`.
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) in the `uri`
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) in the `uri`
and inspecting VictoriaLogs logs then:
```conf
@ -46,8 +46,8 @@ and inspecting VictoriaLogs logs then:
json_date_format iso8601
```
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters).
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
```conf
@ -76,7 +76,7 @@ This usually allows saving network bandwidth and costs by up to 5 times:
compress gzip
```
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/keyconcepts/#multitenancy).
If you need storing logs in other tenant, then specify the needed tenant via `header` options.
For example, the following `fluentbit.conf` config instructs Fluentbit to store the data to `(AccountID=12, ProjectID=34)` tenant:
@ -95,7 +95,7 @@ For example, the following `fluentbit.conf` config instructs Fluentbit to store
See also:
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
- [Fluentbit HTTP output config docs](https://docs.fluentbit.io/manual/pipeline/outputs/http).
- [Docker-compose demo for Fluentbit integration with VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/fluentbit-docker).

View file

@ -13,7 +13,7 @@ aliases:
# Logstash setup
Specify [`output.elasticsearch`](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) section in the `logstash.conf` file
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/):
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/):
```conf
output {
@ -30,11 +30,11 @@ output {
Substitute `localhost:9428` address inside `hosts` with the real TCP address of VictoriaLogs.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) for details on the `parameters` section.
See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details on the `parameters` section.
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters)
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters)
and inspecting VictoriaLogs logs then:
```conf
@ -51,8 +51,8 @@ output {
}
```
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters).
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
```conf
@ -86,7 +86,7 @@ output {
}
```
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy).
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy).
If you need storing logs in other tenant, then specify the needed tenant via `custom_headers` at `output.elasticsearch` section.
For example, the following `logstash.conf` config instructs Logstash to store the data to `(AccountID=12, ProjectID=34)` tenant:
@ -109,7 +109,7 @@ output {
See also:
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
- [Logstash `output.elasticsearch` docs](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html).
- [Docker-compose demo for Logstash integration with VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/logstash).

View file

@ -15,7 +15,7 @@ aliases:
Promtail can be configured to send the collected logs to VictoriaLogs according to the following docs.
Specify [`clients`](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients) section in the configuration file
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/):
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/):
```yaml
clients:
@ -24,18 +24,18 @@ clients:
Substitute `localhost:9428` address inside `clients` with the real TCP address of VictoriaLogs.
By default VictoriaLogs stores all the ingested logs into a single [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
By default VictoriaLogs stores all the ingested logs into a single [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
Storing all the logs in a single log stream may be not so efficient, so it is recommended to specify `_stream_fields` query arg
with the list of labels, which uniquely identify log streams. There is no need in specifying all the labels Promtail generates there -
it is usually enough specifying `instance` and `job` labels. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
it is usually enough specifying `instance` and `job` labels. See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
for details.
See also [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) for details on other supported query args.
See also [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details on other supported query args.
There is no need in specifying `_msg_field` and `_time_field` query args, since VictoriaLogs automatically extracts log message and timestamp from the ingested Loki data.
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters)
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters)
and inspecting VictoriaLogs logs then:
```yaml
@ -43,8 +43,8 @@ clients:
- url: http://localhost:9428/insert/loki/api/v1/push?_stream_fields=instance,job,host,app&debug=1
```
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters).
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, the following config instructs VictoriaLogs to ignore `filename` and `stream` fields in the ingested logs:
```yaml
@ -52,11 +52,11 @@ clients:
- url: http://localhost:9428/insert/loki/api/v1/push?_stream_fields=instance,job,host,app&ignore_fields=filename,stream
```
By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy).
By default the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy).
If you need storing logs in other tenant, then specify the needed tenant via `tenant_id` field
in the [Loki client configuration](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients)
The `tenant_id` must have `AccountID:ProjectID` format, where `AccountID` and `ProjectID` are arbitrary uint32 numbers.
For example, the following config instructs VictoriaLogs to store logs in the `(AccountID=12, ProjectID=34)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy):
For example, the following config instructs VictoriaLogs to store logs in the `(AccountID=12, ProjectID=34)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy):
```yaml
clients:
@ -64,6 +64,6 @@ clients:
tenant_id: "12:34"
```
The ingested log entries can be queried according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
The ingested log entries can be queried according to [these docs](https://docs.victoriametrics.com/victorialogs/querying/).
See also [data ingestion troubleshooting](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#troubleshooting) docs.
See also [data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting) docs.

View file

@ -14,15 +14,15 @@ aliases:
# Data ingestion
[VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) can accept logs from the following log collectors:
[VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) can accept logs from the following log collectors:
- Filebeat. See [how to setup Filebeat for sending logs to VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Filebeat.html).
- Fluentbit. See [how to setup Fluentbit for sending logs to VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Fluentbit.html).
- Logstash. See [how to setup Logstash for sending logs to VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Logstash.html).
- Vector. See [how to setup Vector for sending logs to VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Vector.html).
- Promtail (aka Grafana Loki). See [how to setup Promtail for sending logs to VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Promtail.html).
- Filebeat. See [how to setup Filebeat for sending logs to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/Filebeat.html).
- Fluentbit. See [how to setup Fluentbit for sending logs to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/Fluentbit.html).
- Logstash. See [how to setup Logstash for sending logs to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/Logstash.html).
- Vector. See [how to setup Vector for sending logs to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/Vector.html).
- Promtail (aka Grafana Loki). See [how to setup Promtail for sending logs to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/Promtail.html).
The ingested logs can be queried according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
The ingested logs can be queried according to [these docs](https://docs.victoriametrics.com/victorialogs/querying/).
See also:
@ -56,18 +56,18 @@ echo '{"create":{}}
It is possible to push thousands of log lines in a single request to this API.
If the [timestamp field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) is set to `"0"`,
If the [timestamp field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) is set to `"0"`,
then the current timestamp at VictoriaLogs side is used per each ingested log line.
Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. For example, `2023-06-20T15:32:10Z`.
Optional fractional part of seconds can be specified after the dot - `2023-06-20T15:32:10.123Z`.
Timezone can be specified instead of `Z` suffix - `2023-06-20T15:32:10+02:00`.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) for details on fields,
See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields,
which must be present in the ingested log messages.
The API accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details.
The following command verifies that the data has been successfully ingested to VictoriaLogs by [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) it:
The following command verifies that the data has been successfully ingested to VictoriaLogs by [querying](https://docs.victoriametrics.com/victorialogs/querying/) it:
```sh
curl http://localhost:9428/select/logsql/query -d 'query=host.name:host123'
@ -80,7 +80,7 @@ The command should return the following response:
```
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [how to query specific fields](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#querying-specific-fields).
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
The duration of requests to `/insert/elasticsearch/_bulk` can be monitored with `vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}` metric.
@ -88,7 +88,7 @@ See also:
- [How to debug data ingestion](#troubleshooting).
- [HTTP parameters, which can be passed to the API](#http-parameters).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
### JSON stream API
@ -106,18 +106,18 @@ echo '{ "log": { "level": "info", "message": "hello world" }, "date": "0", "stre
It is possible to push unlimited number of log lines in a single request to this API.
If the [timestamp field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) is set to `"0"`,
If the [timestamp field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) is set to `"0"`,
then the current timestamp at VictoriaLogs side is used per each ingested log line.
Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. For example, `2023-06-20T15:32:10Z`.
Optional fractional part of seconds can be specified after the dot - `2023-06-20T15:32:10.123Z`.
Timezone can be specified instead of `Z` suffix - `2023-06-20T15:32:10+02:00`.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) for details on fields,
See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields,
which must be present in the ingested log messages.
The API accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details.
The following command verifies that the data has been successfully ingested into VictoriaLogs by [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) it:
The following command verifies that the data has been successfully ingested into VictoriaLogs by [querying](https://docs.victoriametrics.com/victorialogs/querying/) it:
```sh
curl http://localhost:9428/select/logsql/query -d 'query=log.level:*'
@ -132,7 +132,7 @@ The command should return the following response:
```
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [how to query specific fields](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#querying-specific-fields).
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
The duration of requests to `/insert/jsonline` can be monitored with `vl_http_request_duration_seconds{path="/insert/jsonline"}` metric.
@ -140,7 +140,7 @@ See also:
- [How to debug data ingestion](#troubleshooting).
- [HTTP parameters, which can be passed to the API](#http-parameters).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
### Loki JSON API
@ -158,7 +158,7 @@ It is possible to push thousands of log streams and log lines in a single reques
The API accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details.
There is no need in specifying `_msg_field` and `_time_field` query args, since VictoriaLogs automatically extracts log message and timestamp from the ingested Loki data.
The following command verifies that the data has been successfully ingested into VictoriaLogs by [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/) it:
The following command verifies that the data has been successfully ingested into VictoriaLogs by [querying](https://docs.victoriametrics.com/victorialogs/querying/) it:
```sh
curl http://localhost:9428/select/logsql/query -d 'query=fizzbuzz'
@ -171,7 +171,7 @@ The command should return the following response:
```
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [how to query specific fields](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#querying-specific-fields).
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
The duration of requests to `/insert/loki/api/v1/push` can be monitored with `vl_http_request_duration_seconds{path="/insert/loki/api/v1/push"}` metric.
@ -179,28 +179,28 @@ See also:
- [How to debug data ingestion](#troubleshooting).
- [HTTP parameters, which can be passed to the API](#http-parameters).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
### HTTP parameters
VictoriaLogs accepts the following parameters at [data ingestion HTTP APIs](#http-apis):
- `_msg_field` - it must contain the name of the [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with the [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) generated by the log shipper.
- `_msg_field` - it must contain the name of the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
with the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) generated by the log shipper.
This is usually the `message` field for Filebeat and Logstash.
If the `_msg_field` parameter isn't set, then VictoriaLogs reads the log message from the `_msg` field.
- `_time_field` - it must contain the name of the [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with the [log timestamp](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) generated by the log shipper.
- `_time_field` - it must contain the name of the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
with the [log timestamp](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) generated by the log shipper.
This is usually the `@timestamp` field for Filebeat and Logstash.
If the `_time_field` parameter isn't set, then VictoriaLogs reads the timestamp from the `_time` field.
If this field doesn't exist, then the current timestamp is used.
- `_stream_fields` - it should contain comma-separated list of [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) names,
which uniquely identify every [log stream](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) collected the log shipper.
- `_stream_fields` - it should contain comma-separated list of [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names,
which uniquely identify every [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) collected the log shipper.
If the `_stream_fields` parameter isn't set, then all the ingested logs are written to default log stream - `{}`.
- `ignore_fields` - this parameter may contain the list of [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) names,
- `ignore_fields` - this parameter may contain the list of [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names,
which must be ignored during data ingestion.
- `debug` - if this parameter is set to `1`, then the ingested logs aren't stored in VictoriaLogs. Instead,
@ -211,7 +211,7 @@ See also [HTTP headers](#http-headers).
### HTTP headers
VictoriaLogs accepts optional `AccountID` and `ProjectID` headers at [data ingestion HTTP APIs](#http-apis).
These headers may contain the needed tenant to ingest data to. See [multitenancy docs](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy) for details.
These headers may contain the needed tenant to ingest data to. See [multitenancy docs](https://docs.victoriametrics.com/victorialogs/#multitenancy) for details.
## Troubleshooting
@ -221,35 +221,35 @@ The following command can be used for verifying whether the data is successfully
curl http://localhost:9428/select/logsql/query -d 'query=*' | head
```
This command selects all the data ingested into VictoriaLogs via [HTTP query API](https://docs.victoriametrics.com/VictoriaLogs/querying/#http-api)
using [any value filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter),
while `head` cancels query execution after reading the first 10 log lines. See [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line)
This command selects all the data ingested into VictoriaLogs via [HTTP query API](https://docs.victoriametrics.com/victorialogs/querying/#http-api)
using [any value filter](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter),
while `head` cancels query execution after reading the first 10 log lines. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
for more details on how `head` integrates with VictoriaLogs.
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [how to query specific fields](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#querying-specific-fields).
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
VictoriaLogs provides the following command-line flags, which can help debugging data ingestion issues:
- `-logNewStreams` - if this flag is passed to VictoriaLogs, then it logs all the newly
registered [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This may help debugging [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
registered [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This may help debugging [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality).
- `-logIngestedRows` - if this flag is passed to VictoriaLogs, then it logs all the ingested
[log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
[log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See also `debug` [parameter](#http-parameters).
VictoriaLogs exposes various [metrics](https://docs.victoriametrics.com/VictoriaLogs/#monitoring), which may help debugging data ingestion issues:
VictoriaLogs exposes various [metrics](https://docs.victoriametrics.com/victorialogs/#monitoring), which may help debugging data ingestion issues:
- `vl_rows_ingested_total` - the number of ingested [log entries](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
- `vl_rows_ingested_total` - the number of ingested [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
since the last VictoriaLogs restart. If this number icreases over time, then logs are successfully ingested into VictoriaLogs.
The ingested logs can be inspected in the following ways:
- By passing `debug=1` parameter to every request to [data ingestion APIs](#http-apis). The ingested rows aren't stored in VictoriaLogs
in this case. Instead, they are logged, so they can be investigated later.
The `vl_rows_dropped_total` [metric](https://docs.victoriametrics.com/VictoriaLogs/#monitoring) is incremented for each logged row.
The `vl_rows_dropped_total` [metric](https://docs.victoriametrics.com/victorialogs/#monitoring) is incremented for each logged row.
- By passing `-logIngestedRows` command-line flag to VictoriaLogs. In this case it logs all the ingested data, so it can be investigated later.
- `vl_streams_created_total` - the number of created [log streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields)
- `vl_streams_created_total` - the number of created [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
since the last VictoriaLogs restart. If this metric grows rapidly during extended periods of time, then this may lead
to [high cardinality issues](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality).
to [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality).
The newly created log streams can be inspected in logs by passing `-logNewStreams` command-line flag to VictoriaLogs.
## Log collectors and data ingestion formats
@ -258,8 +258,8 @@ Here is the list of log collectors and their ingestion formats supported by Vict
| How to setup the collector | Format: Elasticsearch | Format: JSON Stream | Format: Loki |
|------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------|---------------------------------------------------------------|-------------------------------------------------------------------------------------|
| [Filebeat](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Filebeat.html) | [Yes](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) | No | No |
| [Fluentbit](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Fluentbit.html) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/http) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/loki) |
| [Logstash](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Logstash.html) | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) | No | No |
| [Vector](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Vector.html) | [Yes](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/http/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/loki/) |
| [Promtail](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/Promtail.html) | No | No | [Yes](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients) |
| [Filebeat](https://docs.victoriametrics.com/victorialogs/data-ingestion/Filebeat.html) | [Yes](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) | No | No |
| [Fluentbit](https://docs.victoriametrics.com/victorialogs/data-ingestion/Fluentbit.html) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/http) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/loki) |
| [Logstash](https://docs.victoriametrics.com/victorialogs/data-ingestion/Logstash.html) | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) | No | No |
| [Vector](https://docs.victoriametrics.com/victorialogs/data-ingestion/Vector.html) | [Yes](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/http/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/loki/) |
| [Promtail](https://docs.victoriametrics.com/victorialogs/data-ingestion/Promtail.html) | No | No | [Yes](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients) |

View file

@ -15,7 +15,7 @@ aliases:
## Elasticsearch sink
Specify [Elasticsearch sink type](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/) in the `vector.toml`
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/):
for sending the collected logs to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/):
```toml
[sinks.vlogs]
@ -36,12 +36,12 @@ Substitute the `localhost:9428` address inside `endpoints` section with the real
Replace `your_input` with the name of the `inputs` section, which collects logs. See [these docs](https://vector.dev/docs/reference/configuration/sources/) for details.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters) for details on parameters specified
See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details on parameters specified
in the `[sinks.vlogs.query]` section.
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters)
It is recommended verifying whether the initial setup generates the needed [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
and uses the correct [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
This can be done by specifying `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters)
in the `[sinks.vlogs.query]` section and inspecting VictoriaLogs logs then:
```toml
@ -60,8 +60,8 @@ in the `[sinks.vlogs.query]` section and inspecting VictoriaLogs logs then:
debug = "1"
```
If some [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#http-parameters).
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be skipped
during data ingestion, then they can be put into `ignore_fields` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, the following config instructs VictoriaLogs to ignore `log.offset` and `event.original` fields in the ingested logs:
```toml
@ -120,7 +120,7 @@ This usually allows saving network bandwidth and costs by up to 5 times:
_stream_fields = "host,container_name"
```
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#multitenancy).
By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/keyconcepts/#multitenancy).
If you need storing logs in other tenant, then specify the needed tenant via `[sinks.vlogq.request.headers]` section.
For example, the following `vector.toml` config instructs Vector to store the data to `(AccountID=12, ProjectID=34)` tenant:
@ -164,7 +164,7 @@ for sending data to [JSON stream API](https://docs.victoriametrics.com/victorial
See also:
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/querying/).
- [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting).
- [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
- [Elasticsearch output docs for Vector](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/).
- [Docker-compose demo for Filebeat integration with VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/vector-docker).

View file

@ -15,7 +15,7 @@ aliases:
## Data model
[VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) works with both structured and unstructured logs.
[VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) works with both structured and unstructured logs.
Every log entry must contain at least [log message field](#message-field) plus arbitrary number of additional `key=value` fields.
A single log entry can be expressed as a single-level [JSON](https://www.json.org/json-en.html) object with string keys and values.
For example:
@ -32,7 +32,7 @@ For example:
```
VictoriaLogs automatically transforms multi-level JSON (aka nested JSON) into single-level JSON
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) according to the following rules:
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) according to the following rules:
- Nested dictionaries are flattened by concatenating dictionary keys with `.` char. For example, the following multi-level JSON
is transformed into the following single-level JSON:
@ -55,7 +55,7 @@ during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-inges
}
```
- Arrays, numbers and boolean values are converted into strings. This simplifies [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) over such values.
- Arrays, numbers and boolean values are converted into strings. This simplifies [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) over such values.
For example, the following JSON with an array, a number and a boolean value is converted into the following JSON with string values:
```json
@ -75,7 +75,7 @@ during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-inges
```
Both label name and label value may contain arbitrary chars. Such chars must be encoded
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:
@ -86,8 +86,8 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
}
```
VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) logs.
This enables [full-text search](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) across all the fields.
VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs.
This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields.
VictoriaLogs supports the following field types:
@ -109,9 +109,9 @@ log entry, which can be ingested into VictoriaLogs:
```
If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field
via `_msg_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
via `_msg_field` query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
### Time field
@ -126,14 +126,14 @@ For example:
```
If the actual timestamp has other than `_time` field name, then it is possible to specify the real timestamp
field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
For example, if timestamp is located in the `event.created` field, then specify `_time_field=event.created` query arg
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
If `_time` field is missing, then the data ingestion time is used as log entry timestamp.
The log entry timestamp allows quickly narrowing down the search to a particular time range.
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) for details.
See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details.
### Stream fields
@ -149,14 +149,14 @@ VictoriaLogs optimizes storing and querying of individual log streams. This prov
than a mixed log stream from multiple distinct applications.
- Increased query performance, since VictoriaLogs needs to scan lower amounts of data
when [searching by stream labels](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
when [searching by stream labels](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream,
so it stores all the received log entries in a single default stream - `{}`.
This may lead to not-so-optimal resource usage and query performance.
Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/).
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
For example, if logs from Kubernetes containers have the following fields:
```json
@ -170,7 +170,7 @@ For example, if logs from Kubernetes containers have the following fields:
```
then sepcify `_stream_fields=kubernetes.namespace,kubernetes.node.name,kubernetes.pod.name,kubernetes.container.name`
query arg during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/) in order to properly store
query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) in order to properly store
per-container logs into distinct streams.
#### How to determine which fields must be associated with log streams?
@ -186,7 +186,7 @@ In this case it is OK to associate the log stream with filepath fields such as `
Structured logs may contain big number of fields, which do not change across log entries received from a single application instance.
There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify
the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter).
can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
@ -199,14 +199,14 @@ VictoriaLogs works perfectly with such fields unless they are associated with [l
Never associate high-cardinality fields with [log streams](#stream-fields), since this may result
to the following issues:
- Performance degradation during [data ingestion](https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/VictoriaLogs/querying/)
- Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/)
- Increased memory usage
- Increased CPU usage
- Increased disk space usage
- Increased disk read / write IO
VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriametrics.com/VictoriaLogs/#monitoring),
VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriametrics.com/victorialogs/#monitoring),
which shows the number of created streams since the last VictoriaLogs restart. If this metric grows at a rapid rate
during long period of time, then there are high chances of high cardinality issues mentioned above.
VictoriaLogs can log all the newly registered streams when `-logNewStreams` command-line flag is passed to it.
@ -218,5 +218,5 @@ The rest of [structured logging](#data-model) fields are optional. They can be u
For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message.
E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for more details.
See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details.

View file

@ -14,7 +14,7 @@ aliases:
# Querying
[VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/) can be queried with [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html)
[VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) can be queried with [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/)
via the following ways:
- [Web UI](#web-ui) - a web-based UI for querying logs
@ -36,7 +36,7 @@ VictoriaLogs provides the following HTTP endpoints:
### Querying logs
Logs stored in VictoriaLogs can be queried at the `/select/logsql/query` HTTP endpoint.
The [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) query must be passed via `query` argument.
The [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) query must be passed via `query` argument.
For example, the following query returns all the log entries with the `error` word:
```sh
@ -44,13 +44,13 @@ curl http://localhost:9428/select/logsql/query -d 'query=error'
```
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [how to query specific fields](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#querying-specific-fields).
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body
with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries
when they do not fit the maximum url length of the used clients and proxies.
See [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) for details on what can be passed to the `query` arg.
See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for details on what can be passed to the `query` arg.
The `query` arg must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/URL_encoding) when passing it to `curl`
or similar tools.
@ -86,10 +86,10 @@ This allows post-processing the returned lines at the client side with the usual
See [these docs](#command-line) for more details.
The returned lines aren't sorted, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#sort-pipe)
Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe)
or at client side with the usual `sort` command according to [these docs](#command-line).
By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy) is queried.
By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) is queried.
If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches
for log messages at `(AccountID=12, ProjectID=34)` tenant:
@ -97,7 +97,7 @@ for log messages at `(AccountID=12, ProjectID=34)` tenant:
curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID: 34' -d 'query=error'
```
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/VictoriaLogs/#monitoring)
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/victorialogs/#monitoring)
with `vl_http_requests_total{path="/select/logsql/query"}` metric.
- [Querying hits stats](#querying-hits-stats)
@ -443,7 +443,7 @@ See also:
## Web UI
VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) and exploration
VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/victorialogs/logsql/) and exploration
at `http://localhost:9428/select/vmui`. The UI allows exploring query results:
<img src="vmui.webp" />
@ -457,9 +457,9 @@ There are three modes of displaying query results:
This is the first version that has minimal functionality. It comes with the following limitations:
- The number of query results is always limited to 1000 lines. Iteratively add
more specific [filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) to the query
more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query
in order to get full response with less than 1000 lines.
- Queries are always executed against [tenant](https://docs.victoriametrics.com/VictoriaLogs/#multitenancy) `0`.
- Queries are always executed against [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) `0`.
These limitations will be removed in future versions.
@ -482,7 +482,7 @@ These features allow executing queries at command-line interface, which potentia
without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server.
For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains
many log messages with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word):
many log messages with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=error'
@ -512,9 +512,9 @@ It doesn't consume CPU and disk IO resources during this time. It resumes query
when the `less` continues reading the response stream.
Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain
`cannot open file` [phrase](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#phrase-filter).
`cannot open file` [phrase](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
Then the query can be narrowed down to `error AND "cannot open file"`
(see [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator).
(see [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) about `AND` operator).
Then run the updated command in order to continue the investigation:
```sh
@ -531,45 +531,45 @@ The returned VictoriaLogs query response can be post-processed with any combinat
which are usually used for log analysis - `grep`, `jq`, `awk`, `sort`, `uniq`, `wc`, etc.
For example, the following command uses `wc -l` Unix command for counting the number of log messages
with the `error` [word](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#word)
received from [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields) with `app="nginx"` field
with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
received from [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with `app="nginx"` field
during the last 5 minutes:
```sh
curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:5m AND error' | wc -l
```
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) about `_stream` filter,
[these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#time-filter) about `_time` filter
and [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#logical-filter) about `AND` operator.
See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) about `_stream` filter,
[these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) about `_time` filter
and [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) about `AND` operator.
The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field):
The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less
```
This command uses `jq` for extracting [`_time`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field)
and [`_msg`](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) fields from the returned results,
This command uses `jq` for extracting [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field)
and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields from the returned results,
and piping them to `sort` command.
Note that the `sort` command needs to read all the response stream before returning the sorted results. So the command above
can take non-trivial amounts of time if the `query` returns too many results. The solution is to narrow down the `query`
before sorting the results. See [these tips](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#performance-tips)
before sorting the results. See [these tips](https://docs.victoriametrics.com/victorialogs/logsql/#performance-tips)
on how to narrow down query results.
The following example calculates stats on the number of log messages received during the last 5 minutes
grouped by `log.level` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
grouped by `log.level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:*' | jq -r '."log.level"' | sort | uniq -c
```
The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter),
The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter),
then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values
are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats.
See also:
- [Key concepts](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html).
- [LogsQL docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html).
- [Key concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/).
- [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/).

View file

@ -1700,7 +1700,7 @@ func (c *blockResultColumn) sumValues(br *blockResult) (float64, int) {
values := c.getValuesEncoded(br)
for i := range values {
if i == 0 || values[i-1] != values[i] {
f, ok = tryParseFloat64(values[i])
f, ok = tryParseNumber(values[i])
}
if ok {
sum += f
@ -1713,7 +1713,7 @@ func (c *blockResultColumn) sumValues(br *blockResult) (float64, int) {
a := encoding.GetFloat64s(len(dictValues))
dictValuesFloat := a.A
for i, v := range dictValues {
f, ok := tryParseFloat64(v)
f, ok := tryParseNumber(v)
if !ok {
f = nan
}

View file

@ -289,13 +289,32 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
}
func matchRange(s string, minValue, maxValue float64) bool {
f, ok := tryParseFloat64(s)
f, ok := tryParseNumber(s)
if !ok {
return false
}
return f >= minValue && f <= maxValue
}
func tryParseNumber(s string) (float64, bool) {
if len(s) == 0 {
return 0, false
}
f, ok := tryParseFloat64(s)
if ok {
return f, true
}
nsecs, ok := tryParseDuration(s)
if ok {
return float64(nsecs), true
}
bytes, ok := tryParseBytes(s)
if ok {
return float64(bytes), true
}
return 0, false
}
func toUint64Range(minValue, maxValue float64) (uint64, uint64) {
minValue = math.Ceil(minValue)
maxValue = math.Floor(maxValue)

View file

@ -11,7 +11,7 @@ import (
// JSONParser parses a single JSON log message into Fields.
//
// See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model
// See https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model
//
// Use GetParser() for obtaining the parser.
type JSONParser struct {

View file

@ -327,6 +327,8 @@ func (q *Query) Optimize() {
for _, f := range t.funcs {
f.iff.optimizeFilterIn()
}
case *pipeReplace:
t.iff.optimizeFilterIn()
case *pipeFormat:
t.iff.optimizeFilterIn()
case *pipeExtract:

View file

@ -141,6 +141,12 @@ func parsePipe(lex *lexer) (pipe, error) {
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("replace"):
pr, err := parsePipeReplace(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'replace' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex)
if err != nil {

View file

@ -9,10 +9,13 @@ import (
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
type pipeExtract struct {
fromField string
ptn *pattern
ptn *pattern
patternStr string
keepOriginalFields bool
skipEmptyResults bool
// iff is an optional filter for skipping the extract func
iff *ifFilter
}
@ -26,6 +29,12 @@ func (pe *pipeExtract) String() string {
if !isMsgFieldName(pe.fromField) {
s += " from " + quoteTokenIfNeeded(pe.fromField)
}
if pe.keepOriginalFields {
s += " keep_original_fields"
}
if pe.skipEmptyResults {
s += " skip_empty_results"
}
return s
}
@ -38,7 +47,9 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
if !unneededFieldsOrig.contains(step.field) {
needFromField = true
}
unneededFields.add(step.field)
if !pe.keepOriginalFields && !pe.skipEmptyResults {
unneededFields.add(step.field)
}
}
}
if needFromField {
@ -55,7 +66,9 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
for _, step := range pe.ptn.steps {
if step.field != "" && neededFieldsOrig.contains(step.field) {
needFromField = true
neededFields.remove(step.field)
if !pe.keepOriginalFields && !pe.skipEmptyResults {
neededFields.remove(step.field)
}
}
}
if needFromField {
@ -81,7 +94,7 @@ func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ f
}
}
return newPipeUnpackProcessor(workersCount, unpackFunc, ppBase, pe.fromField, "", pe.iff)
return newPipeUnpackProcessor(workersCount, unpackFunc, ppBase, pe.fromField, "", pe.keepOriginalFields, pe.skipEmptyResults, pe.iff)
}
func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
@ -121,11 +134,24 @@ func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
fromField = f
}
keepOriginalFields := false
skipEmptyResults := false
switch {
case lex.isKeyword("keep_original_fields"):
lex.nextToken()
keepOriginalFields = true
case lex.isKeyword("skip_empty_results"):
lex.nextToken()
skipEmptyResults = true
}
pe := &pipeExtract{
fromField: fromField,
ptn: ptn,
patternStr: patternStr,
iff: iff,
fromField: fromField,
ptn: ptn,
patternStr: patternStr,
keepOriginalFields: keepOriginalFields,
skipEmptyResults: skipEmptyResults,
iff: iff,
}
return pe, nil

View file

@ -11,8 +11,14 @@ func TestParsePipeExtractSuccess(t *testing.T) {
}
f(`extract "foo<bar>"`)
f(`extract "foo<bar>" skip_empty_results`)
f(`extract "foo<bar>" keep_original_fields`)
f(`extract "foo<bar>" from x`)
f(`extract "foo<bar>" from x skip_empty_results`)
f(`extract "foo<bar>" from x keep_original_fields`)
f(`extract if (x:y) "foo<bar>" from baz`)
f(`extract if (x:y) "foo<bar>" from baz skip_empty_results`)
f(`extract if (x:y) "foo<bar>" from baz keep_original_fields`)
}
func TestParsePipeExtractFailure(t *testing.T) {
@ -22,6 +28,8 @@ func TestParsePipeExtractFailure(t *testing.T) {
}
f(`extract`)
f(`extract keep_original_fields`)
f(`extract skip_empty_results`)
f(`extract from`)
f(`extract from x`)
f(`extract from x "y<foo>"`)
@ -38,6 +46,66 @@ func TestPipeExtract(t *testing.T) {
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// skip empty results
f(`extract "baz=<abc> a=<aa>" skip_empty_results`, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" `},
{"aa", "foobar"},
{"abc", "ippl"},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" `},
{"aa", "foobar"},
{"abc", "x y=z"},
},
})
// no skip empty results
f(`extract "baz=<abc> a=<aa>"`, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" `},
{"aa", "foobar"},
{"abc", "ippl"},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" `},
{"aa", ""},
{"abc", "x y=z"},
},
})
// keep original fields
f(`extract "baz=<abc> a=<aa>" keep_original_fields`, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"aa", "foobar"},
{"abc", ""},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"abc", "x y=z"},
{"aa", "foobar"},
},
})
// no keep original fields
f(`extract "baz=<abc> a=<aa>"`, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"aa", "foobar"},
{"abc", ""},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"abc", "x y=z"},
{"aa", "b"},
},
})
// single row, extract from _msg
f(`extract "baz=<abc> a=<aa>"`, [][]Field{
{
@ -226,41 +294,63 @@ func TestPipeExtractUpdateNeededFields(t *testing.T) {
// all the needed fields
f("extract '<foo>' from x", "*", "", "*", "foo")
f("extract if (foo:bar) '<foo>' from x", "*", "", "*", "")
f("extract if (foo:bar) '<foo>' from x keep_original_fields", "*", "", "*", "")
f("extract if (foo:bar) '<foo>' from x skip_empty_results", "*", "", "*", "")
// unneeded fields do not intersect with pattern and output fields
f("extract '<foo>' from x", "*", "f1,f2", "*", "f1,f2,foo")
f("extract '<foo>' from x keep_original_fields", "*", "f1,f2", "*", "f1,f2")
f("extract '<foo>' from x skip_empty_results", "*", "f1,f2", "*", "f1,f2")
f("extract if (f1:x) '<foo>' from x", "*", "f1,f2", "*", "f2,foo")
f("extract if (f1:x) '<foo>' from x keep_original_fields", "*", "f1,f2", "*", "f2")
f("extract if (f1:x) '<foo>' from x skip_empty_results", "*", "f1,f2", "*", "f2")
f("extract if (foo:bar f1:x) '<foo>' from x", "*", "f1,f2", "*", "f2")
// unneeded fields intersect with pattern
f("extract '<foo>' from x", "*", "f2,x", "*", "f2,foo")
f("extract '<foo>' from x keep_original_fields", "*", "f2,x", "*", "f2")
f("extract '<foo>' from x skip_empty_results", "*", "f2,x", "*", "f2")
f("extract if (f1:abc) '<foo>' from x", "*", "f2,x", "*", "f2,foo")
f("extract if (f2:abc) '<foo>' from x", "*", "f2,x", "*", "foo")
// unneeded fields intersect with output fields
f("extract '<foo>x<bar>' from x", "*", "f2,foo", "*", "bar,f2,foo")
f("extract '<foo>x<bar>' from x keep_original_fields", "*", "f2,foo", "*", "f2,foo")
f("extract '<foo>x<bar>' from x skip_empty_results", "*", "f2,foo", "*", "f2,foo")
f("extract if (f1:abc) '<foo>x<bar>' from x", "*", "f2,foo", "*", "bar,f2,foo")
f("extract if (f2:abc foo:w) '<foo>x<bar>' from x", "*", "f2,foo", "*", "bar")
f("extract if (f2:abc foo:w) '<foo>x<bar>' from x keep_original_fields", "*", "f2,foo", "*", "")
f("extract if (f2:abc foo:w) '<foo>x<bar>' from x skip_empty_results", "*", "f2,foo", "*", "")
// unneeded fields intersect with all the output fields
f("extract '<foo>x<bar>' from x", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
f("extract if (a:b f2:q x:y foo:w) '<foo>x<bar>' from x", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
f("extract if (a:b f2:q x:y foo:w) '<foo>x<bar>' from x keep_original_fields", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
f("extract if (a:b f2:q x:y foo:w) '<foo>x<bar>' from x skip_empty_results", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
// needed fields do not intersect with pattern and output fields
f("extract '<foo>x<bar>' from x", "f1,f2", "", "f1,f2", "")
f("extract '<foo>x<bar>' from x keep_original_fields", "f1,f2", "", "f1,f2", "")
f("extract '<foo>x<bar>' from x skip_empty_results", "f1,f2", "", "f1,f2", "")
f("extract if (a:b) '<foo>x<bar>' from x", "f1,f2", "", "f1,f2", "")
f("extract if (f1:b) '<foo>x<bar>' from x", "f1,f2", "", "f1,f2", "")
// needed fields intersect with pattern field
f("extract '<foo>x<bar>' from x", "f2,x", "", "f2,x", "")
f("extract '<foo>x<bar>' from x keep_original_fields", "f2,x", "", "f2,x", "")
f("extract '<foo>x<bar>' from x skip_empty_results", "f2,x", "", "f2,x", "")
f("extract if (a:b) '<foo>x<bar>' from x", "f2,x", "", "f2,x", "")
// needed fields intersect with output fields
f("extract '<foo>x<bar>' from x", "f2,foo", "", "f2,x", "")
f("extract '<foo>x<bar>' from x keep_original_fields", "f2,foo", "", "foo,f2,x", "")
f("extract '<foo>x<bar>' from x skip_empty_results", "f2,foo", "", "foo,f2,x", "")
f("extract if (a:b) '<foo>x<bar>' from x", "f2,foo", "", "a,f2,x", "")
// needed fields intersect with pattern and output fields
f("extract '<foo>x<bar>' from x", "f2,foo,x,y", "", "f2,x,y", "")
f("extract '<foo>x<bar>' from x keep_original_fields", "f2,foo,x,y", "", "foo,f2,x,y", "")
f("extract '<foo>x<bar>' from x skip_empty_results", "f2,foo,x,y", "", "foo,f2,x,y", "")
f("extract if (a:b foo:q) '<foo>x<bar>' from x", "f2,foo,x,y", "", "a,f2,foo,x,y", "")
}

View file

@ -17,6 +17,9 @@ type pipeFormat struct {
resultField string
keepOriginalFields bool
skipEmptyResults bool
// iff is an optional filter for skipping the format func
iff *ifFilter
}
@ -30,13 +33,21 @@ func (pf *pipeFormat) String() string {
if !isMsgFieldName(pf.resultField) {
s += " as " + quoteTokenIfNeeded(pf.resultField)
}
if pf.keepOriginalFields {
s += " keep_original_fields"
}
if pf.skipEmptyResults {
s += " skip_empty_results"
}
return s
}
func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
if !unneededFields.contains(pf.resultField) {
unneededFields.add(pf.resultField)
if !pf.keepOriginalFields && !pf.skipEmptyResults {
unneededFields.add(pf.resultField)
}
if pf.iff != nil {
unneededFields.removeFields(pf.iff.neededFields)
}
@ -48,7 +59,9 @@ func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet)
}
} else {
if neededFields.contains(pf.resultField) {
neededFields.remove(pf.resultField)
if !pf.keepOriginalFields && !pf.skipEmptyResults {
neededFields.remove(pf.resultField)
}
if pf.iff != nil {
neededFields.addFields(pf.iff.neededFields)
}
@ -97,7 +110,7 @@ func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
}
shard := &pfp.shards[workerID]
shard.wctx.init(workerID, pfp.ppBase, br)
shard.wctx.init(workerID, pfp.ppBase, pfp.pf.keepOriginalFields, pfp.pf.skipEmptyResults, br)
shard.uctx.init(workerID, "")
bm := &shard.bm
@ -189,11 +202,24 @@ func parsePipeFormat(lex *lexer) (*pipeFormat, error) {
resultField = field
}
keepOriginalFields := false
skipEmptyResults := false
switch {
case lex.isKeyword("keep_original_fields"):
lex.nextToken()
keepOriginalFields = true
case lex.isKeyword("skip_empty_results"):
lex.nextToken()
skipEmptyResults = true
}
pf := &pipeFormat{
formatStr: formatStr,
steps: steps,
resultField: resultField,
iff: iff,
formatStr: formatStr,
steps: steps,
resultField: resultField,
keepOriginalFields: keepOriginalFields,
skipEmptyResults: skipEmptyResults,
iff: iff,
}
return pf, nil

View file

@ -11,13 +11,21 @@ func TestParsePipeFormatSuccess(t *testing.T) {
}
f(`format "foo<bar>"`)
f(`format "foo<bar>" skip_empty_results`)
f(`format "foo<bar>" keep_original_fields`)
f(`format "" as x`)
f(`format "<>" as x`)
f(`format foo as x`)
f(`format foo as x skip_empty_results`)
f(`format foo as x keep_original_fields`)
f(`format "<foo>"`)
f(`format "<foo>bar<baz>"`)
f(`format "bar<baz><xyz>bac"`)
f(`format "bar<baz><xyz>bac" skip_empty_results`)
f(`format "bar<baz><xyz>bac" keep_original_fields`)
f(`format if (x:y) "bar<baz><xyz>bac"`)
f(`format if (x:y) "bar<baz><xyz>bac" skip_empty_results`)
f(`format if (x:y) "bar<baz><xyz>bac" keep_original_fields`)
}
func TestParsePipeFormatFailure(t *testing.T) {
@ -39,6 +47,104 @@ func TestPipeFormat(t *testing.T) {
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// skip_empty_results
f(`format '<foo><bar>' as x skip_empty_results`, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", "111"},
},
{
{"xfoo", `ppp`},
{"xbar", `123`},
{"x", "222"},
},
}, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", `abccde`},
},
{
{"xfoo", `ppp`},
{"xbar", `123`},
{"x", `222`},
},
})
// no skip_empty_results
f(`format '<foo><bar>' as x`, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", "111"},
},
{
{"xfoo", `ppp`},
{"xbar", `123`},
{"x", "222"},
},
}, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", `abccde`},
},
{
{"xfoo", `ppp`},
{"xbar", `123`},
{"x", ``},
},
})
// no keep_original_fields
f(`format '{"foo":<q:foo>,"bar":"<bar>"}' as x`, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", "qwe"},
},
{
{"foo", `ppp`},
{"bar", `123`},
},
}, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", `{"foo":"abc","bar":"cde"}`},
},
{
{"foo", `ppp`},
{"bar", `123`},
{"x", `{"foo":"ppp","bar":"123"}`},
},
})
// keep_original_fields
f(`format '{"foo":<q:foo>,"bar":"<bar>"}' as x keep_original_fields`, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", "qwe"},
},
{
{"foo", `ppp`},
{"bar", `123`},
},
}, [][]Field{
{
{"foo", `abc`},
{"bar", `cde`},
{"x", `qwe`},
},
{
{"foo", `ppp`},
{"bar", `123`},
{"x", `{"foo":"ppp","bar":"123"}`},
},
})
// plain string into a single field
f(`format '{"foo":<q:foo>,"bar":"<bar>"}' as x`, [][]Field{
{
@ -160,42 +266,80 @@ func TestPipeFormatUpdateNeededFields(t *testing.T) {
// all the needed fields
f(`format "foo" as x`, "*", "", "*", "x")
f(`format "foo" as x skip_empty_results`, "*", "", "*", "")
f(`format "foo" as x keep_original_fields`, "*", "", "*", "")
f(`format "<f1>foo" as x`, "*", "", "*", "x")
f(`format if (f2:z) "<f1>foo" as x`, "*", "", "*", "x")
f(`format if (f2:z) "<f1>foo" as x skip_empty_results`, "*", "", "*", "")
f(`format if (f2:z) "<f1>foo" as x keep_original_fields`, "*", "", "*", "")
// unneeded fields do not intersect with pattern and output field
f(`format "foo" as x`, "*", "f1,f2", "*", "f1,f2,x")
f(`format "<f3>foo" as x`, "*", "f1,f2", "*", "f1,f2,x")
f(`format if (f4:z) "<f3>foo" as x`, "*", "f1,f2", "*", "f1,f2,x")
f(`format if (f1:z) "<f3>foo" as x`, "*", "f1,f2", "*", "f2,x")
f(`format if (f1:z) "<f3>foo" as x skip_empty_results`, "*", "f1,f2", "*", "f2")
f(`format if (f1:z) "<f3>foo" as x keep_original_fields`, "*", "f1,f2", "*", "f2")
// unneeded fields intersect with pattern
f(`format "<f1>foo" as x`, "*", "f1,f2", "*", "f2,x")
f(`format "<f1>foo" as x skip_empty_results`, "*", "f1,f2", "*", "f2")
f(`format "<f1>foo" as x keep_original_fields`, "*", "f1,f2", "*", "f2")
f(`format if (f4:z) "<f1>foo" as x`, "*", "f1,f2", "*", "f2,x")
f(`format if (f4:z) "<f1>foo" as x skip_empty_results`, "*", "f1,f2", "*", "f2")
f(`format if (f4:z) "<f1>foo" as x keep_original_fields`, "*", "f1,f2", "*", "f2")
f(`format if (f2:z) "<f1>foo" as x`, "*", "f1,f2", "*", "x")
f(`format if (f2:z) "<f1>foo" as x skip_empty_results`, "*", "f1,f2", "*", "")
f(`format if (f2:z) "<f1>foo" as x keep_original_fields`, "*", "f1,f2", "*", "")
// unneeded fields intersect with output field
f(`format "<f1>foo" as x`, "*", "x,y", "*", "x,y")
f(`format "<f1>foo" as x skip_empty_results`, "*", "x,y", "*", "x,y")
f(`format "<f1>foo" as x keep_original_fields`, "*", "x,y", "*", "x,y")
f(`format if (f2:z) "<f1>foo" as x`, "*", "x,y", "*", "x,y")
f(`format if (f2:z) "<f1>foo" as x skip_empty_results`, "*", "x,y", "*", "x,y")
f(`format if (f2:z) "<f1>foo" as x keep_original_fields`, "*", "x,y", "*", "x,y")
f(`format if (y:z) "<f1>foo" as x`, "*", "x,y", "*", "x,y")
f(`format if (y:z) "<f1>foo" as x skip_empty_results`, "*", "x,y", "*", "x,y")
f(`format if (y:z) "<f1>foo" as x keep_original_fields`, "*", "x,y", "*", "x,y")
// needed fields do not intersect with pattern and output field
f(`format "<f1>foo" as f2`, "x,y", "", "x,y", "")
f(`format "<f1>foo" as f2 keep_original_fields`, "x,y", "", "x,y", "")
f(`format "<f1>foo" as f2 skip_empty_results`, "x,y", "", "x,y", "")
f(`format if (f3:z) "<f1>foo" as f2`, "x,y", "", "x,y", "")
f(`format if (f3:z) "<f1>foo" as f2 skip_empty_results`, "x,y", "", "x,y", "")
f(`format if (f3:z) "<f1>foo" as f2 keep_original_fields`, "x,y", "", "x,y", "")
f(`format if (x:z) "<f1>foo" as f2`, "x,y", "", "x,y", "")
f(`format if (x:z) "<f1>foo" as f2 skip_empty_results`, "x,y", "", "x,y", "")
f(`format if (x:z) "<f1>foo" as f2 keep_original_fields`, "x,y", "", "x,y", "")
// needed fields intersect with pattern field
f(`format "<f1>foo" as f2`, "f1,y", "", "f1,y", "")
f(`format "<f1>foo" as f2 skip_empty_results`, "f1,y", "", "f1,y", "")
f(`format "<f1>foo" as f2 keep_original_fields`, "f1,y", "", "f1,y", "")
f(`format if (f3:z) "<f1>foo" as f2`, "f1,y", "", "f1,y", "")
f(`format if (x:z) "<f1>foo" as f2`, "f1,y", "", "f1,y", "")
f(`format if (x:z) "<f1>foo" as f2 skip_empty_results`, "f1,y", "", "f1,y", "")
f(`format if (x:z) "<f1>foo" as f2 keep_original_fields`, "f1,y", "", "f1,y", "")
// needed fields intersect with output field
f(`format "<f1>foo" as f2`, "f2,y", "", "f1,y", "")
f(`format "<f1>foo" as f2 skip_empty_results`, "f2,y", "", "f1,f2,y", "")
f(`format "<f1>foo" as f2 keep_original_fields`, "f2,y", "", "f1,f2,y", "")
f(`format if (f3:z) "<f1>foo" as f2`, "f2,y", "", "f1,f3,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2`, "f2,y", "", "f1,x,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2 skip_empty_results`, "f2,y", "", "f1,f2,x,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2 keep_original_fields`, "f2,y", "", "f1,f2,x,y", "")
// needed fields intersect with pattern and output fields
f(`format "<f1>foo" as f2`, "f1,f2,y", "", "f1,y", "")
f(`format "<f1>foo" as f2 skip_empty_results`, "f1,f2,y", "", "f1,f2,y", "")
f(`format "<f1>foo" as f2 keep_original_fields`, "f1,f2,y", "", "f1,f2,y", "")
f(`format if (f3:z) "<f1>foo" as f2`, "f1,f2,y", "", "f1,f3,y", "")
f(`format if (f3:z) "<f1>foo" as f2 skip_empty_results`, "f1,f2,y", "", "f1,f2,f3,y", "")
f(`format if (f3:z) "<f1>foo" as f2 keep_original_fields`, "f1,f2,y", "", "f1,f2,f3,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2`, "f1,f2,y", "", "f1,x,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2 skip_empty_results`, "f1,f2,y", "", "f1,f2,x,y", "")
f(`format if (x:z or y:w) "<f1>foo" as f2 keep_original_fields`, "f1,f2,y", "", "f1,f2,x,y", "")
}

View file

@ -0,0 +1,229 @@
package logstorage
import (
"fmt"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// pipeReplace processes '| replace ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#replace-pipe
type pipeReplace struct {
srcField string
oldSubstr string
newSubstr string
// limit limits the number of replacements, which can be performed
limit uint64
// iff is an optional filter for skipping the replace operation
iff *ifFilter
}
func (pr *pipeReplace) String() string {
s := "replace"
if pr.iff != nil {
s += " " + pr.iff.String()
}
s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.oldSubstr), quoteTokenIfNeeded(pr.newSubstr))
if pr.srcField != "_msg" {
s += " at " + quoteTokenIfNeeded(pr.srcField)
}
if pr.limit > 0 {
s += fmt.Sprintf(" limit %d", pr.limit)
}
return s
}
func (pr *pipeReplace) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
if !unneededFields.contains(pr.srcField) && pr.iff != nil {
unneededFields.removeFields(pr.iff.neededFields)
}
} else {
if neededFields.contains(pr.srcField) && pr.iff != nil {
neededFields.addFields(pr.iff.neededFields)
}
}
}
func (pr *pipeReplace) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
return &pipeReplaceProcessor{
pr: pr,
ppBase: ppBase,
shards: make([]pipeReplaceProcessorShard, workersCount),
}
}
type pipeReplaceProcessor struct {
pr *pipeReplace
ppBase pipeProcessor
shards []pipeReplaceProcessorShard
}
type pipeReplaceProcessorShard struct {
pipeReplaceProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeReplaceProcessorShardNopad{})%128]byte
}
type pipeReplaceProcessorShardNopad struct {
bm bitmap
uctx fieldsUnpackerContext
wctx pipeUnpackWriteContext
}
func (prp *pipeReplaceProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &prp.shards[workerID]
shard.wctx.init(workerID, prp.ppBase, false, false, br)
shard.uctx.init(workerID, "")
pr := prp.pr
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pr.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
prp.ppBase.writeBlock(workerID, br)
return
}
}
c := br.getColumnByName(pr.srcField)
values := c.getValues(br)
bb := bbPool.Get()
vPrev := ""
shard.uctx.addField(pr.srcField, "")
for rowIdx, v := range values {
if bm.isSetBit(rowIdx) {
if vPrev != v {
bb.B = appendReplace(bb.B[:0], v, pr.oldSubstr, pr.newSubstr, pr.limit)
s := bytesutil.ToUnsafeString(bb.B)
shard.uctx.resetFields()
shard.uctx.addField(pr.srcField, s)
vPrev = v
}
shard.wctx.writeRow(rowIdx, shard.uctx.fields)
} else {
shard.wctx.writeRow(rowIdx, nil)
}
}
bbPool.Put(bb)
shard.wctx.flush()
shard.wctx.reset()
shard.uctx.reset()
}
func (prp *pipeReplaceProcessor) flush() error {
return nil
}
func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
if !lex.isKeyword("replace") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "replace")
}
lex.nextToken()
// parse optional if (...)
var iff *ifFilter
if lex.isKeyword("if") {
f, err := parseIfFilter(lex)
if err != nil {
return nil, err
}
iff = f
}
if !lex.isKeyword("(") {
return nil, fmt.Errorf("missing '(' after 'replace'")
}
lex.nextToken()
oldSubstr, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse oldSubstr in 'replace': %w", err)
}
if !lex.isKeyword(",") {
return nil, fmt.Errorf("missing ',' after 'replace(%q'", oldSubstr)
}
lex.nextToken()
newSubstr, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse newSubstr in 'replace': %w", err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("missing ')' after 'replace(%q, %q'", oldSubstr, newSubstr)
}
lex.nextToken()
srcField := "_msg"
if lex.isKeyword("at") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'at' field after 'replace(%q, %q)': %w", oldSubstr, newSubstr, err)
}
srcField = f
}
limit := uint64(0)
if lex.isKeyword("limit") {
lex.nextToken()
n, ok := tryParseUint64(lex.token)
if !ok {
return nil, fmt.Errorf("cannot parse 'limit %s' in 'replace'", lex.token)
}
lex.nextToken()
limit = n
}
pr := &pipeReplace{
srcField: srcField,
oldSubstr: oldSubstr,
newSubstr: newSubstr,
limit: limit,
iff: iff,
}
return pr, nil
}
func appendReplace(dst []byte, s, oldSubstr, newSubstr string, limit uint64) []byte {
if len(s) == 0 {
return dst
}
if len(oldSubstr) == 0 {
return append(dst, s...)
}
replacements := uint64(0)
for {
n := strings.Index(s, oldSubstr)
if n < 0 {
return append(dst, s...)
}
dst = append(dst, s[:n]...)
dst = append(dst, newSubstr...)
s = s[n+len(oldSubstr):]
replacements++
if limit > 0 && replacements >= limit {
return append(dst, s...)
}
}
}

View file

@ -0,0 +1,172 @@
package logstorage
import (
"testing"
)
func TestParsePipeReplaceSuccess(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeSuccess(t, pipeStr)
}
f(`replace (foo, bar)`)
f(`replace (" ", "") at x`)
f(`replace if (x:y) ("-", ":") at a`)
f(`replace (" ", "") at x limit 10`)
f(`replace if (x:y) (" ", "") at foo limit 10`)
}
func TestParsePipeReplaceFailure(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeFailure(t, pipeStr)
}
f(`replace`)
f(`replace if`)
f(`replace foo`)
f(`replace (`)
f(`replace (foo`)
f(`replace (foo,`)
f(`replace(foo,bar`)
f(`replace(foo,bar,baz)`)
f(`replace(foo,bar) abc`)
f(`replace(bar,baz) limit`)
f(`replace(bar,baz) limit N`)
}
func TestPipeReplace(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// replace without limits at _msg
f(`replace ("_", "-")`, [][]Field{
{
{"_msg", `a_bc_def`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
}, [][]Field{
{
{"_msg", `a-bc-def`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
})
// replace with limit 1 at foo
f(`replace ("_", "-") at foo limit 1`, [][]Field{
{
{"foo", `a_bc_def`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
}, [][]Field{
{
{"foo", `a-bc_def`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
})
// replace with limit 100 at foo
f(`replace ("_", "-") at foo limit 100`, [][]Field{
{
{"foo", `a_bc_def`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
}, [][]Field{
{
{"foo", `a-bc-def`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
})
// conditional replace at foo
f(`replace if (bar:abc) ("_", "") at foo`, [][]Field{
{
{"foo", `a_bc_def`},
{"bar", `cde`},
},
{
{"foo", `123_456`},
{"bar", "abc"},
},
}, [][]Field{
{
{"foo", `a_bc_def`},
{"bar", `cde`},
},
{
{"foo", `123456`},
{"bar", "abc"},
},
})
}
func TestPipeReplaceUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f(`replace ("a", "b") at x`, "*", "", "*", "")
f(`replace if (f1:q) ("a", "b") at x`, "*", "", "*", "")
// unneeded fields do not intersect with at field
f(`replace ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
f(`replace if (f3:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
f(`replace if (f2:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1")
// unneeded fields intersect with at field
f(`replace ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace if (f1:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace if (x:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace if (y:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
// needed fields do not intersect with at field
f(`replace ("a", "b") at x`, "f2,y", "", "f2,y", "")
f(`replace if (f1:q) ("a", "b") at x`, "f2,y", "", "f2,y", "")
// needed fields intersect with at field
f(`replace ("a", "b") at y`, "f2,y", "", "f2,y", "")
f(`replace if (f1:q) ("a", "b") at y`, "f2,y", "", "f1,f2,y", "")
}
func TestAppendReplace(t *testing.T) {
f := func(s, oldSubstr, newSubstr string, limit int, resultExpected string) {
t.Helper()
result := appendReplace(nil, s, oldSubstr, newSubstr, uint64(limit))
if string(result) != resultExpected {
t.Fatalf("unexpected result for appendReplace(%q, %q, %q, %d)\ngot\n%s\nwant\n%s", s, oldSubstr, newSubstr, limit, result, resultExpected)
}
}
f("", "", "", 0, "")
f("", "foo", "bar", 0, "")
f("foo", "foo", "bar", 0, "bar")
f("foox", "foo", "bar", 0, "barx")
f("afoo", "foo", "bar", 0, "abar")
f("afoox", "foo", "bar", 0, "abarx")
f("foo-bar-baz", "-", "_", 0, "foo_bar_baz")
f("foo bar baz ", " ", "", 0, "foobarbaz")
}

View file

@ -621,8 +621,8 @@ func lessString(a, b string) bool {
return nA < nB
}
fA, okA := tryParseFloat64(a)
fB, okB := tryParseFloat64(b)
fA, okA := tryParseNumber(a)
fB, okB := tryParseNumber(b)
if okA && okB {
return fA < fB
}

View file

@ -54,7 +54,7 @@ func (uctx *fieldsUnpackerContext) addField(name, value string) {
}
func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpackerContext, s string), ppBase pipeProcessor,
fromField, fieldPrefix string, iff *ifFilter) *pipeUnpackProcessor {
fromField string, fieldPrefix string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter) *pipeUnpackProcessor {
return &pipeUnpackProcessor{
unpackFunc: unpackFunc,
@ -62,9 +62,11 @@ func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpack
shards: make([]pipeUnpackProcessorShard, workersCount),
fromField: fromField,
fieldPrefix: fieldPrefix,
iff: iff,
fromField: fromField,
fieldPrefix: fieldPrefix,
keepOriginalFields: keepOriginalFields,
skipEmptyResults: skipEmptyResults,
iff: iff,
}
}
@ -74,8 +76,10 @@ type pipeUnpackProcessor struct {
shards []pipeUnpackProcessorShard
fromField string
fieldPrefix string
fromField string
fieldPrefix string
keepOriginalFields bool
skipEmptyResults bool
iff *ifFilter
}
@ -100,7 +104,7 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
}
shard := &pup.shards[workerID]
shard.wctx.init(workerID, pup.ppBase, br)
shard.wctx.init(workerID, pup.ppBase, pup.keepOriginalFields, pup.skipEmptyResults, br)
shard.uctx.init(workerID, pup.fieldPrefix)
bm := &shard.bm
@ -153,8 +157,10 @@ func (pup *pipeUnpackProcessor) flush() error {
}
type pipeUnpackWriteContext struct {
workerID uint
ppBase pipeProcessor
workerID uint
ppBase pipeProcessor
keepOriginalFields bool
skipEmptyResults bool
brSrc *blockResult
csSrc []*blockResultColumn
@ -172,6 +178,7 @@ type pipeUnpackWriteContext struct {
func (wctx *pipeUnpackWriteContext) reset() {
wctx.workerID = 0
wctx.ppBase = nil
wctx.keepOriginalFields = false
wctx.brSrc = nil
wctx.csSrc = nil
@ -186,11 +193,13 @@ func (wctx *pipeUnpackWriteContext) reset() {
wctx.valuesLen = 0
}
func (wctx *pipeUnpackWriteContext) init(workerID uint, ppBase pipeProcessor, brSrc *blockResult) {
func (wctx *pipeUnpackWriteContext) init(workerID uint, ppBase pipeProcessor, keepOriginalFields, skipEmptyResults bool, brSrc *blockResult) {
wctx.reset()
wctx.workerID = workerID
wctx.ppBase = ppBase
wctx.keepOriginalFields = keepOriginalFields
wctx.skipEmptyResults = skipEmptyResults
wctx.brSrc = brSrc
wctx.csSrc = brSrc.getColumns()
@ -231,6 +240,15 @@ func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
}
for i, f := range extraFields {
v := f.Value
if v == "" && wctx.skipEmptyResults || wctx.keepOriginalFields {
idx := getBlockResultColumnIdxByName(csSrc, f.Name)
if idx >= 0 {
vOrig := csSrc[idx].getValueAtRow(brSrc, rowIdx)
if vOrig != "" {
v = vOrig
}
}
}
rcs[len(csSrc)+i].addValue(v)
wctx.valuesLen += len(v)
}

View file

@ -22,6 +22,9 @@ type pipeUnpackJSON struct {
// resultPrefix is prefix to add to unpacked field names
resultPrefix string
keepOriginalFields bool
skipEmptyResults bool
// iff is an optional filter for skipping unpacking json
iff *ifFilter
}
@ -40,14 +43,20 @@ func (pu *pipeUnpackJSON) String() string {
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
if pu.keepOriginalFields {
s += " keep_original_fields"
}
if pu.skipEmptyResults {
s += " skip_empty_results"
}
return s
}
func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.iff, neededFields, unneededFields)
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
}
func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, iff *ifFilter, neededFields, unneededFields fieldsSet) {
func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter, neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone()
unneededFieldsCount := 0
@ -56,7 +65,9 @@ func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, iff *
if unneededFieldsOrig.contains(f) {
unneededFieldsCount++
}
unneededFields.add(f)
if !keepOriginalFields && !skipEmptyResults {
unneededFields.add(f)
}
}
}
if len(outFields) == 0 || unneededFieldsCount < len(outFields) {
@ -74,7 +85,9 @@ func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, iff *
if neededFieldsOrig.contains(f) {
needFromField = true
}
neededFields.remove(f)
if !keepOriginalFields && !skipEmptyResults {
neededFields.remove(f)
}
}
}
if needFromField {
@ -121,7 +134,7 @@ func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{},
}
PutJSONParser(p)
}
return newPipeUnpackProcessor(workersCount, unpackJSON, ppBase, pu.fromField, pu.resultPrefix, pu.iff)
return newPipeUnpackProcessor(workersCount, unpackJSON, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
}
func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
@ -172,11 +185,24 @@ func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
resultPrefix = p
}
keepOriginalFields := false
skipEmptyResults := false
switch {
case lex.isKeyword("keep_original_fields"):
lex.nextToken()
keepOriginalFields = true
case lex.isKeyword("skip_empty_results"):
lex.nextToken()
skipEmptyResults = true
}
pu := &pipeUnpackJSON{
fromField: fromField,
fields: fields,
resultPrefix: resultPrefix,
iff: iff,
fromField: fromField,
fields: fields,
resultPrefix: resultPrefix,
keepOriginalFields: keepOriginalFields,
skipEmptyResults: skipEmptyResults,
iff: iff,
}
return pu, nil

View file

@ -15,16 +15,30 @@ func TestParsePipeUnpackJSONSuccess(t *testing.T) {
}
f(`unpack_json`)
f(`unpack_json skip_empty_results`)
f(`unpack_json keep_original_fields`)
f(`unpack_json fields (a)`)
f(`unpack_json fields (a, b, c)`)
f(`unpack_json fields (a, b, c) skip_empty_results`)
f(`unpack_json fields (a, b, c) keep_original_fields`)
f(`unpack_json if (a:x)`)
f(`unpack_json if (a:x) skip_empty_results`)
f(`unpack_json if (a:x) keep_original_fields`)
f(`unpack_json from x`)
f(`unpack_json from x skip_empty_results`)
f(`unpack_json from x keep_original_fields`)
f(`unpack_json from x fields (a, b)`)
f(`unpack_json if (a:x) from x fields (a, b)`)
f(`unpack_json if (a:x) from x fields (a, b) skip_empty_results`)
f(`unpack_json if (a:x) from x fields (a, b) keep_original_fields`)
f(`unpack_json from x result_prefix abc`)
f(`unpack_json if (a:x) from x fields (a, b) result_prefix abc`)
f(`unpack_json if (a:x) from x fields (a, b) result_prefix abc skip_empty_results`)
f(`unpack_json if (a:x) from x fields (a, b) result_prefix abc keep_original_fields`)
f(`unpack_json result_prefix abc`)
f(`unpack_json if (a:x) fields (a, b) result_prefix abc`)
f(`unpack_json if (a:x) fields (a, b) result_prefix abc skip_empty_results`)
f(`unpack_json if (a:x) fields (a, b) result_prefix abc keep_original_fields`)
}
func TestParsePipeUnpackJSONFailure(t *testing.T) {
@ -55,6 +69,70 @@ func TestPipeUnpackJSON(t *testing.T) {
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// skip empty results
f("unpack_json skip_empty_results", [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":""}`},
{"foo", "x"},
{"a", "foobar"},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":""}`},
{"foo", "bar"},
{"z", "q"},
{"a", "foobar"},
},
})
// no skip empty results
f("unpack_json", [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":""}`},
{"foo", "x"},
{"a", "foobar"},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":""}`},
{"foo", "bar"},
{"z", "q"},
{"a", ""},
},
})
// no keep original fields
f("unpack_json", [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":"b"}`},
{"foo", "x"},
{"a", ""},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":"b"}`},
{"foo", "bar"},
{"z", "q"},
{"a", "b"},
},
})
// keep original fields
f("unpack_json keep_original_fields", [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":"b"}`},
{"foo", "x"},
{"a", ""},
},
}, [][]Field{
{
{"_msg", `{"foo":"bar","z":"q","a":"b"}`},
{"foo", "x"},
{"z", "q"},
{"a", "b"},
},
})
// unpack only the requested fields
f("unpack_json fields (foo, b)", [][]Field{
{
@ -465,35 +543,61 @@ func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
// all the needed fields
f("unpack_json from x", "*", "", "*", "")
f("unpack_json from x skip_empty_results", "*", "", "*", "")
f("unpack_json from x keep_original_fields", "*", "", "*", "")
f("unpack_json if (y:z) from x", "*", "", "*", "")
f("unpack_json if (y:z) from x fields (a, b)", "*", "", "*", "a,b")
f("unpack_json if (y:z) from x fields (a, b) skip_empty_results", "*", "", "*", "")
f("unpack_json if (y:z) from x fields (a, b) keep_original_fields", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unpack_json from x", "*", "f1,f2", "*", "f1,f2")
f("unpack_json from x skip_empty_results", "*", "f1,f2", "*", "f1,f2")
f("unpack_json from x keep_original_fields", "*", "f1,f2", "*", "f1,f2")
f("unpack_json if (y:z) from x", "*", "f1,f2", "*", "f1,f2")
f("unpack_json if (f1:z) from x", "*", "f1,f2", "*", "f2")
f("unpack_json if (y:z) from x fields (f3)", "*", "f1,f2", "*", "f1,f2,f3")
f("unpack_json if (y:z) from x fields (f1)", "*", "f1,f2", "*", "f1,f2")
f("unpack_json if (y:z) from x fields (f1) skip_empty_results", "*", "f1,f2", "*", "f1,f2")
f("unpack_json if (y:z) from x fields (f1) keep_original_fields", "*", "f1,f2", "*", "f1,f2")
// all the needed fields, unneeded fields intersect with src
f("unpack_json from x", "*", "f2,x", "*", "f2")
f("unpack_json from x skip_empty_results", "*", "f2,x", "*", "f2")
f("unpack_json from x keep_original_fields", "*", "f2,x", "*", "f2")
f("unpack_json if (y:z) from x", "*", "f2,x", "*", "f2")
f("unpack_json if (f2:z) from x", "*", "f1,f2,x", "*", "f1")
f("unpack_json if (f2:z) from x fields (f3)", "*", "f1,f2,x", "*", "f1,f3")
f("unpack_json if (f2:z) from x fields (f3) skip_empty_results", "*", "f1,f2,x", "*", "f1")
f("unpack_json if (f2:z) from x fields (f3) keep_original_fields", "*", "f1,f2,x", "*", "f1")
// needed fields do not intersect with src
f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")
f("unpack_json from x skip_empty_results", "f1,f2", "", "f1,f2,x", "")
f("unpack_json from x keep_original_fields", "f1,f2", "", "f1,f2,x", "")
f("unpack_json if (y:z) from x", "f1,f2", "", "f1,f2,x,y", "")
f("unpack_json if (f1:z) from x", "f1,f2", "", "f1,f2,x", "")
f("unpack_json if (y:z) from x fields (f3)", "f1,f2", "", "f1,f2", "")
f("unpack_json if (y:z) from x fields (f3) skip_empty_results", "f1,f2", "", "f1,f2", "")
f("unpack_json if (y:z) from x fields (f3) keep_original_fields", "f1,f2", "", "f1,f2", "")
f("unpack_json if (y:z) from x fields (f2)", "f1,f2", "", "f1,x,y", "")
f("unpack_json if (f2:z) from x fields (f2)", "f1,f2", "", "f1,f2,x", "")
f("unpack_json if (f2:z) from x fields (f2) skip_empty_results", "f1,f2", "", "f1,f2,x", "")
f("unpack_json if (f2:z) from x fields (f2) keep_original_fields", "f1,f2", "", "f1,f2,x", "")
// needed fields intersect with src
f("unpack_json from x", "f2,x", "", "f2,x", "")
f("unpack_json from x skip_empty_results", "f2,x", "", "f2,x", "")
f("unpack_json from x keep_original_fields", "f2,x", "", "f2,x", "")
f("unpack_json if (y:z) from x", "f2,x", "", "f2,x,y", "")
f("unpack_json if (f2:z y:qwe) from x", "f2,x", "", "f2,x,y", "")
f("unpack_json if (y:z) from x fields (f1)", "f2,x", "", "f2,x", "")
f("unpack_json if (y:z) from x fields (f1) skip_empty_results", "f2,x", "", "f2,x", "")
f("unpack_json if (y:z) from x fields (f1) keep_original_fields", "f2,x", "", "f2,x", "")
f("unpack_json if (y:z) from x fields (f2)", "f2,x", "", "x,y", "")
f("unpack_json if (y:z) from x fields (f2) skip_empty_results", "f2,x", "", "f2,x,y", "")
f("unpack_json if (y:z) from x fields (f2) keep_original_fields", "f2,x", "", "f2,x,y", "")
f("unpack_json if (y:z) from x fields (x)", "f2,x", "", "f2,x,y", "")
f("unpack_json if (y:z) from x fields (x) skip_empty_results", "f2,x", "", "f2,x,y", "")
f("unpack_json if (y:z) from x fields (x) keep_original_fields", "f2,x", "", "f2,x,y", "")
}

View file

@ -20,6 +20,9 @@ type pipeUnpackLogfmt struct {
// resultPrefix is prefix to add to unpacked field names
resultPrefix string
keepOriginalFields bool
skipEmptyResults bool
// iff is an optional filter for skipping unpacking logfmt
iff *ifFilter
}
@ -38,11 +41,17 @@ func (pu *pipeUnpackLogfmt) String() string {
if pu.resultPrefix != "" {
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
}
if pu.keepOriginalFields {
s += " keep_original_fields"
}
if pu.skipEmptyResults {
s += " skip_empty_results"
}
return s
}
func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fieldsSet) {
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.iff, neededFields, unneededFields)
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
}
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
@ -73,7 +82,7 @@ func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}
putLogfmtParser(p)
}
return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppBase, pu.fromField, pu.resultPrefix, pu.iff)
return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
}
@ -125,11 +134,24 @@ func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {
resultPrefix = p
}
keepOriginalFields := false
skipEmptyResults := false
switch {
case lex.isKeyword("keep_original_fields"):
lex.nextToken()
keepOriginalFields = true
case lex.isKeyword("skip_empty_results"):
lex.nextToken()
skipEmptyResults = true
}
pu := &pipeUnpackLogfmt{
fromField: fromField,
fields: fields,
resultPrefix: resultPrefix,
iff: iff,
fromField: fromField,
fields: fields,
resultPrefix: resultPrefix,
keepOriginalFields: keepOriginalFields,
skipEmptyResults: skipEmptyResults,
iff: iff,
}
return pu, nil

View file

@ -11,19 +11,33 @@ func TestParsePipeUnpackLogfmtSuccess(t *testing.T) {
}
f(`unpack_logfmt`)
f(`unpack_logfmt skip_empty_results`)
f(`unpack_logfmt keep_original_fields`)
f(`unpack_logfmt fields (a, b)`)
f(`unpack_logfmt fields (a, b) skip_empty_results`)
f(`unpack_logfmt fields (a, b) keep_original_fields`)
f(`unpack_logfmt if (a:x)`)
f(`unpack_logfmt if (a:x) skip_empty_results`)
f(`unpack_logfmt if (a:x) keep_original_fields`)
f(`unpack_logfmt if (a:x) fields (a, b)`)
f(`unpack_logfmt from x`)
f(`unpack_logfmt from x skip_empty_results`)
f(`unpack_logfmt from x keep_original_fields`)
f(`unpack_logfmt from x fields (a, b)`)
f(`unpack_logfmt from x fields (a, b) skip_empty_results`)
f(`unpack_logfmt from x fields (a, b) keep_original_fields`)
f(`unpack_logfmt if (a:x) from x`)
f(`unpack_logfmt if (a:x) from x fields (a, b)`)
f(`unpack_logfmt from x result_prefix abc`)
f(`unpack_logfmt if (a:x) from x result_prefix abc`)
f(`unpack_logfmt if (a:x) from x fields (a, b) result_prefix abc`)
f(`unpack_logfmt if (a:x) from x fields (a, b) result_prefix abc skip_empty_results`)
f(`unpack_logfmt if (a:x) from x fields (a, b) result_prefix abc keep_original_fields`)
f(`unpack_logfmt result_prefix abc`)
f(`unpack_logfmt if (a:x) result_prefix abc`)
f(`unpack_logfmt if (a:x) fields (a, b) result_prefix abc`)
f(`unpack_logfmt if (a:x) fields (a, b) result_prefix abc skip_empty_results`)
f(`unpack_logfmt if (a:x) fields (a, b) result_prefix abc keep_original_fields`)
}
func TestParsePipeUnpackLogfmtFailure(t *testing.T) {
@ -57,6 +71,7 @@ func TestPipeUnpackLogfmt(t *testing.T) {
f("unpack_logfmt fields (foo, a, b)", [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"a", "xxx"},
},
}, [][]Field{
{
@ -67,10 +82,58 @@ func TestPipeUnpackLogfmt(t *testing.T) {
},
})
// no skip empty results
f("unpack_logfmt", [][]Field{
{
{"_msg", `foo= baz="x y=z" a=b`},
{"foo", "321"},
{"baz", "abcdef"},
},
}, [][]Field{
{
{"_msg", `foo= baz="x y=z" a=b`},
{"foo", ""},
{"baz", "x y=z"},
{"a", "b"},
},
})
// skip empty results
f("unpack_logfmt skip_empty_results", [][]Field{
{
{"_msg", `foo= baz="x y=z" a=b`},
{"foo", "321"},
{"baz", "abcdef"},
},
}, [][]Field{
{
{"_msg", `foo= baz="x y=z" a=b`},
{"foo", "321"},
{"baz", "x y=z"},
{"a", "b"},
},
})
// keep original fields
f("unpack_logfmt keep_original_fields", [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"baz", "abcdef"},
},
}, [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"foo", "bar"},
{"baz", "abcdef"},
{"a", "b"},
},
})
// single row, unpack from _msg
f("unpack_logfmt", [][]Field{
{
{"_msg", `foo=bar baz="x y=z" a=b`},
{"baz", "abcdef"},
},
}, [][]Field{
{
@ -242,7 +305,11 @@ func TestPipeUnpackLogfmtUpdateNeededFields(t *testing.T) {
}
// all the needed fields
f("unpack_logfmt from x", "*", "", "*", "")
f("unpack_logfmt", "*", "", "*", "")
f("unpack_logfmt fields (f1, f2)", "*", "", "*", "f1,f2")
f("unpack_logfmt fields (f1, f2) skip_empty_results", "*", "", "*", "")
f("unpack_logfmt fields (f1, f2) keep_original_fields", "*", "", "*", "")
f("unpack_logfmt keep_original_fields", "*", "", "*", "")
f("unpack_logfmt if (y:z) from x", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src

View file

@ -1,6 +1,7 @@
package logstorage
import (
"strings"
"testing"
)
@ -415,3 +416,24 @@ func TestStatsUniqValues(t *testing.T) {
},
})
}
func TestSortStrings(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
a := strings.Split(s, ",")
sortStrings(a)
result := strings.Join(a, ",")
if result != resultExpected {
t.Fatalf("unexpected sort result\ngot\n%q\nwant\n%q", a, resultExpected)
}
}
f("", "")
f("1", "1")
f("foo,bar,baz", "bar,baz,foo")
f("100ms,1.5s,1.23s", "100ms,1.23s,1.5s")
f("10KiB,10KB,5.34K", "5.34K,10KB,10KiB")
f("v1.10.9,v1.10.10,v1.9.0", "v1.9.0,v1.10.9,v1.10.10")
f("10s,123,100M", "123,100M,10s")
}

View file

@ -59,7 +59,7 @@ type StorageConfig struct {
// LogNewStreams indicates whether to log newly created log streams.
//
// This can be useful for debugging of high cardinality issues.
// https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#high-cardinality
// https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality
LogNewStreams bool
// LogIngestedRows indicates whether to log the ingested log entries.
@ -440,7 +440,7 @@ func (s *Storage) MustAddRows(lr *LogRows) {
tsf := TimeFormatter(ts)
minAllowedTsf := TimeFormatter(minAllowedDay * nsecPerDay)
tooSmallTimestampLogger.Warnf("skipping log entry with too small timestamp=%s; it must be bigger than %s according "+
"to the configured -retentionPeriod=%dd. See https://docs.victoriametrics.com/VictoriaLogs/#retention ; "+
"to the configured -retentionPeriod=%dd. See https://docs.victoriametrics.com/victorialogs/#retention ; "+
"log entry: %s", &tsf, &minAllowedTsf, durationToDays(s.retention), &rf)
s.rowsDroppedTooSmallTimestamp.Add(1)
continue
@ -450,7 +450,7 @@ func (s *Storage) MustAddRows(lr *LogRows) {
tsf := TimeFormatter(ts)
maxAllowedTsf := TimeFormatter(maxAllowedDay * nsecPerDay)
tooBigTimestampLogger.Warnf("skipping log entry with too big timestamp=%s; it must be smaller than %s according "+
"to the configured -futureRetention=%dd; see https://docs.victoriametrics.com/VictoriaLogs/#retention ; "+
"to the configured -futureRetention=%dd; see https://docs.victoriametrics.com/victorialogs/#retention ; "+
"log entry: %s", &tsf, &maxAllowedTsf, durationToDays(s.futureRetention), &rf)
s.rowsDroppedTooBigTimestamp.Add(1)
continue

View file

@ -436,6 +436,10 @@ func hasFilterInWithQueryForPipes(pipes []pipe) bool {
return true
}
}
case *pipeReplace:
if t.iff.hasFilterInWithQuery() {
return true
}
case *pipeFormat:
if t.iff.hasFilterInWithQuery() {
return true
@ -525,6 +529,14 @@ func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFiel
byFields: t.byFields,
funcs: funcsNew,
}
case *pipeReplace:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pr := *t
pr.iff = iffNew
pipesNew[i] = &pr
case *pipeFormat:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {