lib/cgroup: document the ability to detect cgroup v2 memory and cpu limits. This is follow-up for b50024812e

This commit is contained in:
Aliaksandr Valialkin 2021-05-13 09:26:20 +03:00
parent d7be2753c0
commit c0ec541559
5 changed files with 25 additions and 22 deletions

View file

@ -10,6 +10,7 @@ sort: 15
* FEATURE: return `X-Server-Hostname` header in http responses of all the VictoriaMetrics components. This should simplify tracing the origin server behind a load balancer or behind auth proxy during troubleshooting.
* FEATURE: vmselect: allow to use 2x more memory for query processing at `vmselect` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). This should allow processing heavy queries without the need to increase RAM size at `vmselect` nodes.
* FEATURE: add ability to filter `/api/v1/status/tsdb` output with arbitrary [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) passed via `match[]` query args. See [these docs](https://docs.victoriametrics.com/#tsdb-stats) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1168) for details.
* FEATURE: automatically detect memory and cpu limits for VictoriaMetrics components running under [cgroup v2](https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html) environments such as [HashiCorp Nomad](https://www.nomadproject.io/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1269).
* BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240).
* BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention.

View file

@ -42,11 +42,10 @@ func updateGOMAXPROCSToCPUQuota() {
}
func getCPUQuota() float64 {
cpuQuota, err := getCPUStatGeneric()
cpuQuota, err := getCPUQuotaGeneric()
if err != nil {
return 0
}
if cpuQuota <= 0 {
// The quota isn't set. This may be the case in multilevel containers.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728
@ -55,7 +54,7 @@ func getCPUQuota() float64 {
return cpuQuota
}
func getCPUStatGeneric() (float64, error) {
func getCPUQuotaGeneric() (float64, error) {
quotaUS, err := getCPUStat("cpu.cfs_quota_us")
if err == nil {
periodUS, err := getCPUStat("cpu.cfs_period_us")
@ -63,7 +62,7 @@ func getCPUStatGeneric() (float64, error) {
return float64(quotaUS) / float64(periodUS), nil
}
}
return getCPUStatV2("/sys/fs/cgroup", "/proc/self/cgroup")
return getCPUQuotaV2("/sys/fs/cgroup", "/proc/self/cgroup")
}
func getCPUStat(statName string) (int64, error) {
@ -83,31 +82,35 @@ func getOnlineCPUCount() float64 {
return n
}
func getCPUStatV2(sysPrefix, cgroupPath string) (float64, error) {
func getCPUQuotaV2(sysPrefix, cgroupPath string) (float64, error) {
data, err := getFileContents("cpu.max", sysPrefix, cgroupPath, "")
if err != nil {
return 0, err
}
return parseCPUMax(data)
data = strings.TrimSpace(data)
n, err := parseCPUMax(data)
if err != nil {
return 0, fmt.Errorf("cannot parse cpu.max file contents: %w", err)
}
return n, nil
}
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu
// See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu
func parseCPUMax(data string) (float64, error) {
data = strings.TrimRight(data, "\r\n")
bounds := strings.Split(data, " ")
if len(bounds) != 2 {
return 0, fmt.Errorf("unexpected count: %d, want quota and period, got: %s", len(bounds), data)
return 0, fmt.Errorf("unexpected line format: want 'quota period'; got: %s", data)
}
if bounds[0] == "max" {
return -1, nil
}
quota, err := strconv.ParseUint(bounds[0], 10, 64)
if err != nil {
return 0, err
return 0, fmt.Errorf("cannot parse quota: %w", err)
}
period, err := strconv.ParseUint(bounds[1], 10, 64)
if err != nil {
return 0, err
return 0, fmt.Errorf("cannot parse period: %w", err)
}
return float64(quota) / float64(period), nil
}

View file

@ -23,15 +23,15 @@ func TestCountCPUs(t *testing.T) {
f("0-6", 7)
}
func TestGetCPUStatV2(t *testing.T) {
func TestGetCPUQuotaV2(t *testing.T) {
f := func(sysPrefix, cgroupPath string, expectedCPU float64) {
t.Helper()
got, err := getCPUStatV2(sysPrefix, cgroupPath)
got, err := getCPUQuotaV2(sysPrefix, cgroupPath)
if err != nil {
t.Fatalf("unexpected error: %s, sysPrefix: %s, cgroupPath: %s", err, sysPrefix, cgroupPath)
}
if got != expectedCPU {
t.Fatalf("unexpected result from getCPUStatV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU)
t.Fatalf("unexpected result from getCPUQuotaV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU)
}
}
f("testdata/cgroup", "testdata/self/cgroupv2", 2)

View file

@ -16,17 +16,16 @@ func GetMemoryLimit() int64 {
if err == nil {
return n
}
// https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
n, err = getMemStatV2()
n, err = getMemStatV2("memory.max")
if err != nil {
return 0
}
return n
}
func getMemStatV2() (int64, error) {
return getStatGeneric("memory.max", "/sys/fs/cgroup", "/proc/self/cgroup", "")
func getMemStatV2(statName string) (int64, error) {
// See https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
return getStatGeneric(statName, "/sys/fs/cgroup", "/proc/self/cgroup", "")
}
func getMemStat(statName string) (int64, error) {

View file

@ -13,10 +13,10 @@ func getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (i
if err != nil {
return 0, err
}
data = strings.TrimRight(data, "\r\n")
data = strings.TrimSpace(data)
n, err := strconv.ParseInt(data, 10, 64)
if err != nil {
return 0, err
return 0, fmt.Errorf("cannot parse %q: %w", cgroupPath, err)
}
return n, nil
}
@ -33,7 +33,7 @@ func getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (
}
subPath, err := grepFirstMatch(string(cgroupData), cgroupGrepLine, 2, ":")
if err != nil {
return "", err
return "", fmt.Errorf("cannot find cgroup path for %q in %q: %w", cgroupGrepLine, cgroupPath, err)
}
filepath = path.Join(sysfsPrefix, subPath, statName)
data, err = ioutil.ReadFile(filepath)