lib/cgroup: document the ability to detect cgroup v2 memory and cpu limits. This is follow-up for b50024812e

This commit is contained in:
Aliaksandr Valialkin 2021-05-13 09:26:20 +03:00
parent d7be2753c0
commit c0ec541559
5 changed files with 25 additions and 22 deletions

View file

@ -10,6 +10,7 @@ sort: 15
* FEATURE: return `X-Server-Hostname` header in http responses of all the VictoriaMetrics components. This should simplify tracing the origin server behind a load balancer or behind auth proxy during troubleshooting. * FEATURE: return `X-Server-Hostname` header in http responses of all the VictoriaMetrics components. This should simplify tracing the origin server behind a load balancer or behind auth proxy during troubleshooting.
* FEATURE: vmselect: allow to use 2x more memory for query processing at `vmselect` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). This should allow processing heavy queries without the need to increase RAM size at `vmselect` nodes. * FEATURE: vmselect: allow to use 2x more memory for query processing at `vmselect` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). This should allow processing heavy queries without the need to increase RAM size at `vmselect` nodes.
* FEATURE: add ability to filter `/api/v1/status/tsdb` output with arbitrary [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) passed via `match[]` query args. See [these docs](https://docs.victoriametrics.com/#tsdb-stats) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1168) for details. * FEATURE: add ability to filter `/api/v1/status/tsdb` output with arbitrary [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) passed via `match[]` query args. See [these docs](https://docs.victoriametrics.com/#tsdb-stats) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1168) for details.
* FEATURE: automatically detect memory and cpu limits for VictoriaMetrics components running under [cgroup v2](https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html) environments such as [HashiCorp Nomad](https://www.nomadproject.io/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1269).
* BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240). * BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240).
* BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention. * BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention.

View file

@ -42,11 +42,10 @@ func updateGOMAXPROCSToCPUQuota() {
} }
func getCPUQuota() float64 { func getCPUQuota() float64 {
cpuQuota, err := getCPUStatGeneric() cpuQuota, err := getCPUQuotaGeneric()
if err != nil { if err != nil {
return 0 return 0
} }
if cpuQuota <= 0 { if cpuQuota <= 0 {
// The quota isn't set. This may be the case in multilevel containers. // The quota isn't set. This may be the case in multilevel containers.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728 // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728
@ -55,7 +54,7 @@ func getCPUQuota() float64 {
return cpuQuota return cpuQuota
} }
func getCPUStatGeneric() (float64, error) { func getCPUQuotaGeneric() (float64, error) {
quotaUS, err := getCPUStat("cpu.cfs_quota_us") quotaUS, err := getCPUStat("cpu.cfs_quota_us")
if err == nil { if err == nil {
periodUS, err := getCPUStat("cpu.cfs_period_us") periodUS, err := getCPUStat("cpu.cfs_period_us")
@ -63,7 +62,7 @@ func getCPUStatGeneric() (float64, error) {
return float64(quotaUS) / float64(periodUS), nil return float64(quotaUS) / float64(periodUS), nil
} }
} }
return getCPUStatV2("/sys/fs/cgroup", "/proc/self/cgroup") return getCPUQuotaV2("/sys/fs/cgroup", "/proc/self/cgroup")
} }
func getCPUStat(statName string) (int64, error) { func getCPUStat(statName string) (int64, error) {
@ -83,31 +82,35 @@ func getOnlineCPUCount() float64 {
return n return n
} }
func getCPUStatV2(sysPrefix, cgroupPath string) (float64, error) { func getCPUQuotaV2(sysPrefix, cgroupPath string) (float64, error) {
data, err := getFileContents("cpu.max", sysPrefix, cgroupPath, "") data, err := getFileContents("cpu.max", sysPrefix, cgroupPath, "")
if err != nil { if err != nil {
return 0, err return 0, err
} }
return parseCPUMax(data) data = strings.TrimSpace(data)
n, err := parseCPUMax(data)
if err != nil {
return 0, fmt.Errorf("cannot parse cpu.max file contents: %w", err)
}
return n, nil
} }
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu // See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu
func parseCPUMax(data string) (float64, error) { func parseCPUMax(data string) (float64, error) {
data = strings.TrimRight(data, "\r\n")
bounds := strings.Split(data, " ") bounds := strings.Split(data, " ")
if len(bounds) != 2 { if len(bounds) != 2 {
return 0, fmt.Errorf("unexpected count: %d, want quota and period, got: %s", len(bounds), data) return 0, fmt.Errorf("unexpected line format: want 'quota period'; got: %s", data)
} }
if bounds[0] == "max" { if bounds[0] == "max" {
return -1, nil return -1, nil
} }
quota, err := strconv.ParseUint(bounds[0], 10, 64) quota, err := strconv.ParseUint(bounds[0], 10, 64)
if err != nil { if err != nil {
return 0, err return 0, fmt.Errorf("cannot parse quota: %w", err)
} }
period, err := strconv.ParseUint(bounds[1], 10, 64) period, err := strconv.ParseUint(bounds[1], 10, 64)
if err != nil { if err != nil {
return 0, err return 0, fmt.Errorf("cannot parse period: %w", err)
} }
return float64(quota) / float64(period), nil return float64(quota) / float64(period), nil
} }

View file

@ -23,15 +23,15 @@ func TestCountCPUs(t *testing.T) {
f("0-6", 7) f("0-6", 7)
} }
func TestGetCPUStatV2(t *testing.T) { func TestGetCPUQuotaV2(t *testing.T) {
f := func(sysPrefix, cgroupPath string, expectedCPU float64) { f := func(sysPrefix, cgroupPath string, expectedCPU float64) {
t.Helper() t.Helper()
got, err := getCPUStatV2(sysPrefix, cgroupPath) got, err := getCPUQuotaV2(sysPrefix, cgroupPath)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s, sysPrefix: %s, cgroupPath: %s", err, sysPrefix, cgroupPath) t.Fatalf("unexpected error: %s, sysPrefix: %s, cgroupPath: %s", err, sysPrefix, cgroupPath)
} }
if got != expectedCPU { if got != expectedCPU {
t.Fatalf("unexpected result from getCPUStatV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU) t.Fatalf("unexpected result from getCPUQuotaV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU)
} }
} }
f("testdata/cgroup", "testdata/self/cgroupv2", 2) f("testdata/cgroup", "testdata/self/cgroupv2", 2)

View file

@ -16,17 +16,16 @@ func GetMemoryLimit() int64 {
if err == nil { if err == nil {
return n return n
} }
// https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files n, err = getMemStatV2("memory.max")
n, err = getMemStatV2()
if err != nil { if err != nil {
return 0 return 0
} }
return n return n
} }
func getMemStatV2() (int64, error) { func getMemStatV2(statName string) (int64, error) {
return getStatGeneric("memory.max", "/sys/fs/cgroup", "/proc/self/cgroup", "") // See https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
return getStatGeneric(statName, "/sys/fs/cgroup", "/proc/self/cgroup", "")
} }
func getMemStat(statName string) (int64, error) { func getMemStat(statName string) (int64, error) {

View file

@ -13,10 +13,10 @@ func getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (i
if err != nil { if err != nil {
return 0, err return 0, err
} }
data = strings.TrimRight(data, "\r\n") data = strings.TrimSpace(data)
n, err := strconv.ParseInt(data, 10, 64) n, err := strconv.ParseInt(data, 10, 64)
if err != nil { if err != nil {
return 0, err return 0, fmt.Errorf("cannot parse %q: %w", cgroupPath, err)
} }
return n, nil return n, nil
} }
@ -33,7 +33,7 @@ func getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (
} }
subPath, err := grepFirstMatch(string(cgroupData), cgroupGrepLine, 2, ":") subPath, err := grepFirstMatch(string(cgroupData), cgroupGrepLine, 2, ":")
if err != nil { if err != nil {
return "", err return "", fmt.Errorf("cannot find cgroup path for %q in %q: %w", cgroupGrepLine, cgroupPath, err)
} }
filepath = path.Join(sysfsPrefix, subPath, statName) filepath = path.Join(sysfsPrefix, subPath, statName)
data, err = ioutil.ReadFile(filepath) data, err = ioutil.ReadFile(filepath)