diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a979808c2..32fab2ad4 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,10 @@ # tip +* FEATURE: add the following metrics at `/metrics` page for every VictoraMetrics app: + * `process_resident_memory_anonymous_bytes` - RSS share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer. + * `process_resident_memory_pagecache_bytes` - RSS share for page cache memory (aka memory-mapped files). This share can be freed by the OS at any time, so it must be ignored by OOM killer. + # [v1.56.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.56.0) diff --git a/go.mod b/go.mod index bbed0ba7d..aac209bce 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( // Do not use the original github.com/valyala/fasthttp because of issues // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.0.14 - github.com/VictoriaMetrics/metrics v1.15.3 + github.com/VictoriaMetrics/metrics v1.16.0 github.com/VictoriaMetrics/metricsql v0.14.0 github.com/aws/aws-sdk-go v1.37.31 github.com/cespare/xxhash/v2 v2.1.1 diff --git a/go.sum b/go.sum index dfa15ee4d..1eb5f6db6 100644 --- a/go.sum +++ b/go.sum @@ -86,8 +86,8 @@ github.com/VictoriaMetrics/fastcache v1.5.8/go.mod h1:SiMZNgwEPJ9qWLshu9tyuE6bKc github.com/VictoriaMetrics/fasthttp v1.0.14 h1:iWCdHg7JQ1SO0xvPAgw3QFpFT3he+Ugdshg+1clN6CQ= github.com/VictoriaMetrics/fasthttp v1.0.14/go.mod h1:eDVgYyGts3xXpYpVGDxQ3ZlQKW5TSvOqfc9FryjH1JA= github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= -github.com/VictoriaMetrics/metrics v1.15.3 h1:Ry74VPJiLWOOvpnwnjr4LL7JIMy2wb01ssvfUiSBq34= -github.com/VictoriaMetrics/metrics v1.15.3/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= +github.com/VictoriaMetrics/metrics v1.16.0 h1:N/XjkxiSZNF22W3jq9lN0jtzqOIx9n9hvZ8vnu3pHU8= +github.com/VictoriaMetrics/metrics v1.16.0/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metricsql v0.14.0 h1:XGbpZJVskUPJFo2C7vG6ATxXBwkBFPe7EWZXB2HZt2U= github.com/VictoriaMetrics/metricsql v0.14.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdcM= diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go index 895ca72dd..eae9b2483 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go +++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go @@ -1,6 +1,7 @@ package metrics import ( + "bufio" "bytes" "fmt" "io" @@ -10,6 +11,7 @@ import ( "strconv" "strings" "time" + "unsafe" ) // See https://github.com/prometheus/procfs/blob/a4ac0826abceb44c40fc71daed2b301db498b93e/proc_stat.go#L40 . @@ -65,6 +67,11 @@ func writeProcessMetrics(w io.Writer) { log.Printf("ERROR: cannot parse %q read from %s: %s", data, statFilepath, err) return } + rssPageCache, rssAnonymous, err := getRSSStats() + if err != nil { + log.Printf("ERROR: cannot obtain RSS page cache bytes: %s", err) + return + } // It is expensive obtaining `process_open_fds` when big number of file descriptors is opened, // so don't do it here. @@ -79,6 +86,8 @@ func writeProcessMetrics(w io.Writer) { fmt.Fprintf(w, "process_minor_pagefaults_total %d\n", p.Minflt) fmt.Fprintf(w, "process_num_threads %d\n", p.NumThreads) fmt.Fprintf(w, "process_resident_memory_bytes %d\n", p.Rss*4096) + fmt.Fprintf(w, "process_resident_memory_anonymous_bytes %d\n", rssAnonymous) + fmt.Fprintf(w, "process_resident_memory_pagecache_bytes %d\n", rssPageCache) fmt.Fprintf(w, "process_start_time_seconds %d\n", startTimeSeconds) fmt.Fprintf(w, "process_virtual_memory_bytes %d\n", p.Vsize) @@ -133,7 +142,7 @@ func writeIOMetrics(w io.Writer) { var startTimeSeconds = time.Now().Unix() -// WriteFDMetrics writes process_max_fds and process_open_fds metrics to w. +// riteFDMetrics writes process_max_fds and process_open_fds metrics to w. func writeFDMetrics(w io.Writer) { totalOpenFDs, err := getOpenFDsCount("/proc/self/fd") if err != nil { @@ -198,3 +207,120 @@ func getMaxFilesLimit(path string) (uint64, error) { } return 0, fmt.Errorf("cannot find max open files limit") } + +// getRSSStats returns RSS bytes for page cache and anonymous memory. +func getRSSStats() (uint64, uint64, error) { + filepath := "/proc/self/smaps" + f, err := os.Open(filepath) + if err != nil { + return 0, 0, fmt.Errorf("cannot open %q: %w", filepath, err) + } + defer func() { + _ = f.Close() + }() + rssPageCache, rssAnonymous, err := getRSSStatsFromSmaps(f) + if err != nil { + return 0, 0, fmt.Errorf("cannot read %q: %w", filepath, err) + } + return rssPageCache, rssAnonymous, nil +} + +func getRSSStatsFromSmaps(r io.Reader) (uint64, uint64, error) { + var pageCacheBytes, anonymousBytes uint64 + var se smapsEntry + ses := newSmapsEntryScanner(r) + for ses.Next(&se) { + if se.anonymousBytes == 0 { + pageCacheBytes += se.rssBytes + } else { + anonymousBytes += se.rssBytes + } + } + if err := ses.Err(); err != nil { + return 0, 0, err + } + return pageCacheBytes, anonymousBytes, nil +} + +type smapsEntry struct { + rssBytes uint64 + anonymousBytes uint64 +} + +func (se *smapsEntry) reset() { + se.rssBytes = 0 + se.anonymousBytes = 0 +} + +type smapsEntryScanner struct { + bs *bufio.Scanner + err error +} + +func newSmapsEntryScanner(r io.Reader) *smapsEntryScanner { + return &smapsEntryScanner{ + bs: bufio.NewScanner(r), + } +} + +func (ses *smapsEntryScanner) Err() error { + return ses.err +} + +// nextSmapsEntry reads the next se from ses. +// +// It returns true after successful read and false on error or on the end of stream. +// ses.Err() method must be called for determining the error. +func (ses *smapsEntryScanner) Next(se *smapsEntry) bool { + se.reset() + if !ses.bs.Scan() { + ses.err = ses.bs.Err() + return false + } + for ses.bs.Scan() { + line := unsafeBytesToString(ses.bs.Bytes()) + switch { + case strings.HasPrefix(line, "VmFlags:"): + return true + case strings.HasPrefix(line, "Rss:"): + n, err := getSmapsSize(line[len("Rss:"):]) + if err != nil { + ses.err = fmt.Errorf("cannot read Rss size: %w", err) + return false + } + se.rssBytes = n + case strings.HasPrefix(line, "Anonymous:"): + n, err := getSmapsSize(line[len("Anonymous:"):]) + if err != nil { + ses.err = fmt.Errorf("cannot read Anonymous size: %w", err) + return false + } + se.anonymousBytes = n + } + } + ses.err = ses.bs.Err() + if ses.err == nil { + ses.err = fmt.Errorf("unexpected end of stream") + } + return false +} + +func getSmapsSize(line string) (uint64, error) { + line = strings.TrimSpace(line) + if !strings.HasSuffix(line, " kB") { + return 0, fmt.Errorf("cannot find %q suffix in %q", " kB", line) + } + line = line[:len(line)-len(" kB")] + n, err := strconv.ParseUint(line, 10, 64) + if err != nil { + return 0, fmt.Errorf("cannot parse %q: %w", line, err) + } + if n > ((1<<64)-1)/1024 { + return 0, fmt.Errorf("too big size in %q: %d kB", line, n) + } + return n * 1024, nil +} + +func unsafeBytesToString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index af55acf31..491597497 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -17,7 +17,7 @@ github.com/VictoriaMetrics/fastcache github.com/VictoriaMetrics/fasthttp github.com/VictoriaMetrics/fasthttp/fasthttputil github.com/VictoriaMetrics/fasthttp/stackless -# github.com/VictoriaMetrics/metrics v1.15.3 +# github.com/VictoriaMetrics/metrics v1.16.0 ## explicit github.com/VictoriaMetrics/metrics # github.com/VictoriaMetrics/metricsql v0.14.0