diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f7ede2820..183ec9311 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -12,6 +12,7 @@ sort: 15 * Multi-level binary operations. For example, `foo{a="b"} + bar{x="y"} + baz{z="q"}` is now optimized to `foo{a="b",x="y",z="q"} + bar{a="b",x="y",z="q"} + baz{a="b",x="y",z="q"}` * Aggregate functions. For example, `sum(foo{a="b"}) by (c) + bar{c="d"}` is now optimized to `sum(foo{a="b",c="d"}) by (c) + bar{c="d"}` * FEATURE [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): optimize joining with `*_info` labels. For example: `kube_pod_created{namespace="prod"} * on (uid) group_left(node) kube_pod_info` now automatically adds the needed filters on `uid` label to `kube_pod_info` before selecting series for the right side of `*` operation. This may save CPU, RAM and disk IO resources. See [this article](https://www.robustperception.io/exposing-the-software-version-to-prometheus) for details on `*_info` labels. +* FEATURE: all: expose `process_cpu_cores_available` metric, which shows the number of CPU cores available to the app. The number can be fractional if the corresponding cgroup limit is set to a fractional value. This metric is useful for alerting on CPU saturation. For example, the following query alerts when the app uses more than 90% of CPU during the last 5 minutes: `rate(process_cpu_seconds_total[5m]) / process_cpu_cores_available > 0.9` . See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2107). * BUGFIX: return proper results from `highestMax()` function at [Graphite render API](https://docs.victoriametrics.com/#graphite-render-api-usage). Previously it was incorrectly returning timeseries with min peaks instead of max peaks. * BUGFIX: properly limit indexdb cache sizes. Previously they could exceed values set via `-memory.allowedPercent` and/or `-memory.allowedBytes` when `indexdb` contained many data parts. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2007). diff --git a/lib/cgroup/cpu.go b/lib/cgroup/cpu.go index 46c706099..14f16e6c9 100644 --- a/lib/cgroup/cpu.go +++ b/lib/cgroup/cpu.go @@ -7,33 +7,40 @@ import ( "runtime" "strconv" "strings" + + "github.com/VictoriaMetrics/metrics" ) // AvailableCPUs returns the number of available CPU cores for the app. +// +// The number is rounded to the next integer value if fractional number of CPU cores are available. func AvailableCPUs() int { return runtime.GOMAXPROCS(-1) } func init() { - updateGOMAXPROCSToCPUQuota() + cpuCoresAvailable := getCPUQuota() + updateGOMAXPROCSToCPUQuota(cpuCoresAvailable) + metrics.NewGauge(`process_cpu_cores_available`, func() float64 { + return cpuCoresAvailable + }) } -// updateGOMAXPROCSToCPUQuota updates GOMAXPROCS to cgroup CPU quota if GOMAXPROCS isn't set in environment var. -func updateGOMAXPROCSToCPUQuota() { +// updateGOMAXPROCSToCPUQuota updates GOMAXPROCS to cpuCoresAvailable if GOMAXPROCS isn't set in environment var. +func updateGOMAXPROCSToCPUQuota(cpuCoresAvailable float64) { if v := os.Getenv("GOMAXPROCS"); v != "" { // Do not override explicitly set GOMAXPROCS. return } - q := getCPUQuota() - if q <= 0 { - // Do not change GOMAXPROCS + if cpuCoresAvailable <= 0 { + // Do not change GOMAXPROCS if cpuCoresAvailable is incorrectly set. return } - gomaxprocs := int(q + 0.5) + gomaxprocs := int(cpuCoresAvailable + 0.5) numCPU := runtime.NumCPU() if gomaxprocs > numCPU { // There is no sense in setting more GOMAXPROCS than the number of available CPU cores. - return + gomaxprocs = numCPU } if gomaxprocs <= 0 { gomaxprocs = 1