lib/fs: remove the code for tracking whether the given memory region is in page cache

This code didn't give performance gains under production workload, so let's remove it in order to simplify the code.
This commit is contained in:
Aliaksandr Valialkin 2021-02-09 16:49:01 +02:00
parent afa9cf9c57
commit 082eabf51e
3 changed files with 5 additions and 153 deletions

View file

@ -1,34 +0,0 @@
// +build cgo
package fs
// #cgo CFLAGS: -O3
//
// #include <stdint.h> // for uintptr_t
// #include <string.h> // for memcpy
//
// // The memcpy_wrapper allows avoiding memory allocations during calls from Go.
// // See https://github.com/golang/go/issues/24450 .
// static void memcpy_wrapper(uintptr_t dst, uintptr_t src, size_t n) {
// memcpy((void*)dst, (void*)src, n);
// }
import "C"
import (
"runtime"
"unsafe"
)
// copyMmap copies len(dst) bytes from src to dst.
func copyMmap(dst, src []byte) {
// Copy data from mmap'ed src via cgo call in order to protect from goroutine stalls
// when the copied data isn't available in RAM, so the OS triggers reading the data from file.
// See https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d for details.
dstPtr := C.uintptr_t(uintptr(unsafe.Pointer(&dst[0])))
srcPtr := C.uintptr_t(uintptr(unsafe.Pointer(&src[0])))
C.memcpy_wrapper(dstPtr, srcPtr, C.size_t(len(dst)))
// Prevent from GC'ing src or dst during C.memcpy_wrapper call.
runtime.KeepAlive(src)
runtime.KeepAlive(dst)
}

View file

@ -1,12 +0,0 @@
// +build !cgo
package fs
// copyMmap copies len(dst) bytes from src to dst.
func copyMmap(dst, src []byte) {
// This may lead to goroutines stalls when the copied data isn't available in RAM.
// In this case the OS triggers reading the data from file.
// See https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d for details.
// TODO: fix this
copy(dst, src)
}

View file

@ -4,9 +4,6 @@ import (
"flag"
"fmt"
"os"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
@ -17,6 +14,7 @@ var disableMmap = flag.Bool("fs.disableMmap", is32BitPtr, "Whether to use pread(
"By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. "+
"mmap() is usually faster for reading small data chunks than pread()")
// Disable mmap for architectures with 32-bit pointers in order to be able to work with files exceeding 2^32 bytes.
const is32BitPtr = (^uintptr(0) >> 32) == 0
// MustReadAtCloser is rand-access read interface.
@ -32,18 +30,6 @@ type MustReadAtCloser interface {
type ReaderAt struct {
f *os.File
mmapData []byte
// pageCacheBitmap holds a bitmap for recently touched pages in mmapData.
// This bitmap allows using simple copy() instead of copyMmap() for reading recently touched pages,
// which is up to 4x faster when reading small chunks of data via MustReadAt.
pageCacheBitmap atomic.Value
pageCacheBitmapWG sync.WaitGroup
stopCh chan struct{}
}
type pageCacheBitmap struct {
m []uint64
}
// MustReadAt reads len(p) bytes at off from r.
@ -54,11 +40,7 @@ func (r *ReaderAt) MustReadAt(p []byte, off int64) {
if off < 0 {
logger.Panicf("off=%d cannot be negative", off)
}
end := off + int64(len(p))
if len(r.mmapData) == 0 || (len(p) > 8*1024 && !r.isInPageCache(off, end)) {
// Read big blocks directly from file.
// This could be faster than reading these blocks from mmap,
// since it triggers less page faults.
if len(r.mmapData) == 0 {
n, err := r.f.ReadAt(p, off)
if err != nil {
logger.Panicf("FATAL: cannot read %d bytes at offset %d of file %q: %s", len(p), off, r.f.Name(), err)
@ -66,77 +48,21 @@ func (r *ReaderAt) MustReadAt(p []byte, off int64) {
if n != len(p) {
logger.Panicf("FATAL: unexpected number of bytes read; got %d; want %d", n, len(p))
}
if len(r.mmapData) > 0 {
r.markInPageCache(off, end)
}
} else {
if off > int64(len(r.mmapData)-len(p)) {
logger.Panicf("off=%d is out of allowed range [0...%d] for len(p)=%d", off, len(r.mmapData)-len(p), len(p))
}
src := r.mmapData[off:]
if r.isInPageCache(off, end) {
// It is safe copying the data with copy(), since it is likely it is in the page cache.
// This is up to 4x faster than copyMmap() below.
copy(p, src)
} else {
// The data may be missing in the page cache, so it is better to copy it via cgo trick
// in order to avoid P stalls in Go runtime.
// See https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d for details.
copyMmap(p, src)
r.markInPageCache(off, end)
}
// The copy() below may result in thread block as described at https://valyala.medium.com/mmap-in-go-considered-harmful-d92a25cb161d .
// But production workload proved this is OK in most cases, so use it without fear :)
copy(p, src)
}
readCalls.Inc()
readBytes.Add(len(p))
}
func (r *ReaderAt) isInPageCache(start, end int64) bool {
if int64(len(r.mmapData))-end < 4096 {
// If standard copy(dst, src) from Go may read beyond len(src), then this should help
// fixing SIGBUS panic from https://github.com/VictoriaMetrics/VictoriaMetrics/issues/581
return false
}
startBit := uint64(start) / pageSize
endBit := uint64(end) / pageSize
m := r.pageCacheBitmap.Load().(*pageCacheBitmap).m
for startBit <= endBit {
idx := startBit / 64
off := startBit % 64
if idx >= uint64(len(m)) {
return true
}
n := atomic.LoadUint64(&m[idx])
if (n>>off)&1 != 1 {
return false
}
startBit++
}
return true
}
func (r *ReaderAt) markInPageCache(start, end int64) {
startBit := uint64(start) / pageSize
endBit := uint64(end) / pageSize
m := r.pageCacheBitmap.Load().(*pageCacheBitmap).m
for startBit <= endBit {
idx := startBit / 64
off := startBit % 64
n := atomic.LoadUint64(&m[idx])
n |= 1 << off
// It is OK if multiple concurrent goroutines store the same m[idx].
atomic.StoreUint64(&m[idx], n)
startBit++
}
}
// Assume page size is 4KB
const pageSize = 4 * 1024
// MustClose closes r.
func (r *ReaderAt) MustClose() {
close(r.stopCh)
r.pageCacheBitmapWG.Wait()
fname := r.f.Name()
if len(r.mmapData) > 0 {
if err := unix.Munmap(r.mmapData[:cap(r.mmapData)]); err != nil {
@ -168,7 +94,6 @@ func MustOpenReaderAt(path string) *ReaderAt {
}
var r ReaderAt
r.f = f
r.stopCh = make(chan struct{})
if !*disableMmap {
fi, err := f.Stat()
if err != nil {
@ -176,16 +101,6 @@ func MustOpenReaderAt(path string) *ReaderAt {
logger.Panicf("FATAL: error in fstat(%q): %s", path, err)
}
size := fi.Size()
bm := &pageCacheBitmap{
m: make([]uint64, 1+size/pageSize/64),
}
r.pageCacheBitmap.Store(bm)
r.pageCacheBitmapWG.Add(1)
go func() {
defer r.pageCacheBitmapWG.Done()
pageCacheBitmapCleaner(&r.pageCacheBitmap, r.stopCh)
}()
data, err := mmapFile(f, size)
if err != nil {
MustClose(f)
@ -197,23 +112,6 @@ func MustOpenReaderAt(path string) *ReaderAt {
return &r
}
func pageCacheBitmapCleaner(pcbm *atomic.Value, stopCh <-chan struct{}) {
t := time.NewTicker(time.Minute)
for {
select {
case <-stopCh:
t.Stop()
return
case <-t.C:
}
bmOld := pcbm.Load().(*pageCacheBitmap)
bm := &pageCacheBitmap{
m: make([]uint64, len(bmOld.m)),
}
pcbm.Store(bm)
}
}
var (
readCalls = metrics.NewCounter(`vm_fs_read_calls_total`)
readBytes = metrics.NewCounter(`vm_fs_read_bytes_total`)