From ef7e2af8f519e16a60bed85bbfb16ee5a09d64dd Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 11 Aug 2020 22:54:13 +0300 Subject: [PATCH] app: respect CPU limits set via cgroups Update GOMAXPROCS to limits set via cgroups. This should reduce CPU trashing and reduce memory usage for cases when VictoriaMetrics components run in containers with CPU limits. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685 --- app/victoria-metrics/main.go | 2 ++ app/vmagent/main.go | 2 ++ app/vmalert/main.go | 2 ++ app/vmauth/main.go | 2 ++ app/vmbackup/main.go | 3 ++ app/vmrestore/main.go | 3 ++ lib/cgroup/cpu.go | 40 +++++++++++++++++++++++ lib/cgroup/mem.go | 16 +++++++++ lib/cgroup/util.go | 24 ++++++++++++++ lib/memory/memory_linux.go | 63 +++--------------------------------- 10 files changed, 98 insertions(+), 59 deletions(-) create mode 100644 lib/cgroup/cpu.go create mode 100644 lib/cgroup/mem.go create mode 100644 lib/cgroup/util.go diff --git a/app/victoria-metrics/main.go b/app/victoria-metrics/main.go index f15b5a49a2..30d1f637ef 100644 --- a/app/victoria-metrics/main.go +++ b/app/victoria-metrics/main.go @@ -10,6 +10,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" @@ -31,6 +32,7 @@ func main() { envflag.Parse() buildinfo.Init() logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr) startTime := time.Now() storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval) diff --git a/app/vmagent/main.go b/app/vmagent/main.go index 0e0a1e47f0..57d4f673c1 100644 --- a/app/vmagent/main.go +++ b/app/vmagent/main.go @@ -18,6 +18,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" graphiteserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/graphite" @@ -59,6 +60,7 @@ func main() { envflag.Parse() buildinfo.Init() logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() if *dryRun { if err := flag.Set("promscrape.config.strictParse", "true"); err != nil { diff --git a/app/vmalert/main.go b/app/vmalert/main.go index 754a35dbe2..a57361136b 100644 --- a/app/vmalert/main.go +++ b/app/vmalert/main.go @@ -15,6 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" @@ -54,6 +55,7 @@ func main() { envflag.Parse() buildinfo.Init() logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() ctx, cancel := context.WithCancel(context.Background()) manager, err := newManager(ctx) diff --git a/app/vmauth/main.go b/app/vmauth/main.go index d1645a5667..3d6d85b093 100644 --- a/app/vmauth/main.go +++ b/app/vmauth/main.go @@ -10,6 +10,7 @@ import ( "time" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -27,6 +28,7 @@ func main() { envflag.Parse() buildinfo.Init() logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() logger.Infof("starting vmauth at %q...", *httpListenAddr) startTime := time.Now() initAuthConfig() diff --git a/app/vmbackup/main.go b/app/vmbackup/main.go index 0742b4fa54..7a46ef9c93 100644 --- a/app/vmbackup/main.go +++ b/app/vmbackup/main.go @@ -11,6 +11,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -36,6 +37,8 @@ func main() { flag.Usage = usage envflag.Parse() buildinfo.Init() + logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() if len(*snapshotCreateURL) > 0 { logger.Infof("%s", "Snapshots enabled") diff --git a/app/vmrestore/main.go b/app/vmrestore/main.go index b2f4924753..0f75d00da2 100644 --- a/app/vmrestore/main.go +++ b/app/vmrestore/main.go @@ -9,6 +9,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -30,6 +31,8 @@ func main() { flag.Usage = usage envflag.Parse() buildinfo.Init() + logger.Init() + cgroup.UpdateGOMAXPROCSToCPUQuota() srcFS, err := newSrcFS() if err != nil { diff --git a/lib/cgroup/cpu.go b/lib/cgroup/cpu.go new file mode 100644 index 0000000000..9b1a0ceac8 --- /dev/null +++ b/lib/cgroup/cpu.go @@ -0,0 +1,40 @@ +package cgroup + +import ( + "os" + "runtime" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// UpdateGOMAXPROCSToCPUQuota updates GOMAXPROCS to cgroup CPU quota if GOMAXPROCS isn't set in environment var. +// +// This function must be called after logger.Init(). +func UpdateGOMAXPROCSToCPUQuota() { + if v := os.Getenv("GOMAXPROCS"); v != "" { + return + } + q := getCPUQuota() + if q <= 0 { + // Do not change GOMAXPROCS + return + } + gomaxprocs := int(q + 0.5) + if gomaxprocs <= 0 { + gomaxprocs = 1 + } + logger.Infof("updating GOMAXPROCS to %d according to cgroup CPU quota", gomaxprocs) + runtime.GOMAXPROCS(gomaxprocs) +} + +func getCPUQuota() float64 { + quotaUS, err := readInt64("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", "cat /sys/fs/cgroup/cpu$(cat /proc/self/cgroup | grep cpu, | cut -d: -f3)/cpu.cfs_quota_us") + if err != nil { + return 0 + } + periodUS, err := readInt64("/sys/fs/cgroup/cpu/cpu.cfs_period_us", "cat /sys/fs/cgroup/cpu$(cat /proc/self/cgroup | grep cpu, | cut -d: -f3)/cpu.cfs_period_us") + if err != nil { + return 0 + } + return float64(quotaUS) / float64(periodUS) +} diff --git a/lib/cgroup/mem.go b/lib/cgroup/mem.go new file mode 100644 index 0000000000..716c21b0d5 --- /dev/null +++ b/lib/cgroup/mem.go @@ -0,0 +1,16 @@ +package cgroup + +// GetMemoryLimit returns cgroup memory limit +func GetMemoryLimit() int64 { + // Try determining the amount of memory inside docker container. + // See https://stackoverflow.com/questions/42187085/check-mem-limit-within-a-docker-container + // + // Read memory limit according to https://unix.stackexchange.com/questions/242718/how-to-find-out-how-much-memory-lxc-container-is-allowed-to-consume + // This should properly determine the limit inside lxc container. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/84 + n, err := readInt64("/sys/fs/cgroup/memory/memory.limit_in_bytes", "cat /sys/fs/cgroup/memory$(cat /proc/self/cgroup | grep memory | cut -d: -f3)/memory.limit_in_bytes") + if err != nil { + return 0 + } + return n +} diff --git a/lib/cgroup/util.go b/lib/cgroup/util.go new file mode 100644 index 0000000000..337b8fa3bb --- /dev/null +++ b/lib/cgroup/util.go @@ -0,0 +1,24 @@ +package cgroup + +import ( + "bytes" + "io/ioutil" + "os/exec" + "strconv" +) + +func readInt64(path, altCommand string) (int64, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + // Read data according to https://unix.stackexchange.com/questions/242718/how-to-find-out-how-much-memory-lxc-container-is-allowed-to-consume + // This should properly determine the data location inside lxc container. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/84 + cmd := exec.Command("/bin/sh", "-c", altCommand) + data, err = cmd.Output() + if err != nil { + return 0, err + } + } + data = bytes.TrimSpace(data) + return strconv.ParseInt(string(data), 10, 64) +} diff --git a/lib/memory/memory_linux.go b/lib/memory/memory_linux.go index 6529712f8e..5ad333748b 100644 --- a/lib/memory/memory_linux.go +++ b/lib/memory/memory_linux.go @@ -1,11 +1,9 @@ package memory import ( - "io/ioutil" - "os/exec" - "strconv" "syscall" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -20,62 +18,9 @@ func sysTotalMemory() int { if uint64(maxInt)/uint64(si.Totalram) > uint64(si.Unit) { totalMem = int(uint64(si.Totalram) * uint64(si.Unit)) } - - // Try determining the amount of memory inside docker container. - // See https://stackoverflow.com/questions/42187085/check-mem-limit-within-a-docker-container . - data, err := ioutil.ReadFile("/sys/fs/cgroup/memory/memory.limit_in_bytes") - if err != nil { - // Try determining the amount of memory inside lxc container. - mem, err := readLXCMemoryLimit(totalMem) - if err != nil { - return totalMem - } - return mem - } - mem, err := readPositiveInt(data, totalMem) - if err != nil { + mem := cgroup.GetMemoryLimit() + if mem <= 0 || int64(int(mem)) != mem || int(mem) > totalMem { return totalMem } - if mem != totalMem { - return mem - } - - // Try reading LXC memory limit, since it looks like the cgroup limit doesn't work - mem, err = readLXCMemoryLimit(totalMem) - if err != nil { - return totalMem - } - return mem -} - -func readLXCMemoryLimit(totalMem int) (int, error) { - // Read memory limit according to https://unix.stackexchange.com/questions/242718/how-to-find-out-how-much-memory-lxc-container-is-allowed-to-consume - // This should properly determine the limit inside lxc container. - // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/84 - cmd := exec.Command("/bin/sh", "-c", - `cat /sys/fs/cgroup/memory$(cat /proc/self/cgroup | grep memory | cut -d: -f3)/memory.limit_in_bytes`) - data, err := cmd.Output() - if err != nil { - return 0, err - } - return readPositiveInt(data, totalMem) -} - -func readPositiveInt(data []byte, maxN int) (int, error) { - for len(data) > 0 && data[len(data)-1] == '\n' { - data = data[:len(data)-1] - } - n, err := strconv.ParseUint(string(data), 10, 64) - if err != nil { - return 0, err - } - if int64(n) < 0 || int64(int(n)) != int64(n) { - // Int overflow. - return maxN, nil - } - ni := int(n) - if ni > maxN { - return maxN, nil - } - return ni, nil + return int(mem) }