github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/usage/memory.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package usage
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  
    21  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    22  	"github.com/MerlinKodo/gvisor/pkg/bits"
    23  	"github.com/MerlinKodo/gvisor/pkg/memutil"
    24  	"golang.org/x/sys/unix"
    25  )
    26  
    27  // MemoryKind represents a type of memory used by the application.
    28  //
    29  // For efficiency reasons, it is assumed that the Memory implementation is
    30  // responsible for specific stats (documented below), and those may be reported
    31  // in aggregate independently. See the platform.Memory interface as well as the
    32  // control.Usage.Collect method for more information.
    33  type MemoryKind int
    34  
    35  const (
    36  	// System represents miscellaneous system memory. This may include
    37  	// memory that is in the process of being reclaimed, system caches,
    38  	// page tables, swap, etc.
    39  	//
    40  	// This memory kind is backed by platform memory.
    41  	System MemoryKind = iota
    42  
    43  	// Anonymous represents anonymous application memory.
    44  	//
    45  	// This memory kind is backed by platform memory.
    46  	Anonymous
    47  
    48  	// PageCache represents memory allocated to back sandbox-visible files that
    49  	// do not have a local fd. The contents of these files are buffered in
    50  	// memory to support application mmaps.
    51  	//
    52  	// This memory kind is backed by platform memory.
    53  	PageCache
    54  
    55  	// Tmpfs represents memory used by the sandbox-visible tmpfs.
    56  	//
    57  	// This memory kind is backed by platform memory.
    58  	Tmpfs
    59  
    60  	// Ramdiskfs represents memory used by the ramdiskfs.
    61  	//
    62  	// This memory kind is backed by platform memory.
    63  	Ramdiskfs
    64  
    65  	// Mapped represents memory related to files which have a local fd on the
    66  	// host, and thus can be directly mapped. Typically these are files backed
    67  	// by gofers with donated-fd support. Note that this value may not track the
    68  	// exact amount of memory used by mapping on the host, because we don't have
    69  	// any visibility into the host kernel memory management. In particular,
    70  	// once we map some part of a host file, the host kernel is free to
    71  	// abitrarily populate/decommit the pages, which it may do for various
    72  	// reasons (ex. host memory reclaim, NUMA balancing).
    73  	//
    74  	// This memory kind is backed by the host pagecache, via host mmaps.
    75  	Mapped
    76  )
    77  
    78  // memoryStats tracks application memory usage in bytes. All fields correspond to the
    79  // memory category with the same name. This object is thread-safe if accessed
    80  // through the provided methods. The public fields may be safely accessed
    81  // directly on a copy of the object obtained from Memory.Copy().
    82  type memoryStats struct {
    83  	System    atomicbitops.Uint64
    84  	Anonymous atomicbitops.Uint64
    85  	PageCache atomicbitops.Uint64
    86  	Tmpfs     atomicbitops.Uint64
    87  	Mapped    atomicbitops.Uint64
    88  	Ramdiskfs atomicbitops.Uint64
    89  }
    90  
    91  // incLocked adds a usage of 'val' bytes from memory category 'kind'.
    92  //
    93  // Precondition: must be called when locked.
    94  func (ms *memoryStats) incLocked(val uint64, kind MemoryKind) {
    95  	switch kind {
    96  	case System:
    97  		ms.System.Add(val)
    98  	case Anonymous:
    99  		ms.Anonymous.Add(val)
   100  	case PageCache:
   101  		ms.PageCache.Add(val)
   102  	case Mapped:
   103  		ms.Mapped.Add(val)
   104  	case Tmpfs:
   105  		ms.Tmpfs.Add(val)
   106  	case Ramdiskfs:
   107  		ms.Ramdiskfs.Add(val)
   108  	default:
   109  		panic(fmt.Sprintf("invalid memory kind: %v", kind))
   110  	}
   111  }
   112  
   113  // decLocked removes a usage of 'val' bytes from memory category 'kind'.
   114  //
   115  // Precondition: must be called when locked.
   116  func (ms *memoryStats) decLocked(val uint64, kind MemoryKind) {
   117  	switch kind {
   118  	case System:
   119  		ms.System.Add(^(val - 1))
   120  	case Anonymous:
   121  		ms.Anonymous.Add(^(val - 1))
   122  	case PageCache:
   123  		ms.PageCache.Add(^(val - 1))
   124  	case Mapped:
   125  		ms.Mapped.Add(^(val - 1))
   126  	case Tmpfs:
   127  		ms.Tmpfs.Add(^(val - 1))
   128  	case Ramdiskfs:
   129  		ms.Ramdiskfs.Add(^(val - 1))
   130  	default:
   131  		panic(fmt.Sprintf("invalid memory kind: %v", kind))
   132  	}
   133  }
   134  
   135  // totalLocked returns a total usage.
   136  //
   137  // Precondition: must be called when locked.
   138  func (ms *memoryStats) totalLocked() (total uint64) {
   139  	total += ms.System.RacyLoad()
   140  	total += ms.Anonymous.RacyLoad()
   141  	total += ms.PageCache.RacyLoad()
   142  	total += ms.Mapped.RacyLoad()
   143  	total += ms.Tmpfs.RacyLoad()
   144  	total += ms.Ramdiskfs.RacyLoad()
   145  	return
   146  }
   147  
   148  // copyLocked returns a copy of the structure.
   149  //
   150  // Precondition: must be called when locked.
   151  func (ms *memoryStats) copyLocked() MemoryStats {
   152  	return MemoryStats{
   153  		System:    ms.System.RacyLoad(),
   154  		Anonymous: ms.Anonymous.RacyLoad(),
   155  		PageCache: ms.PageCache.RacyLoad(),
   156  		Tmpfs:     ms.Tmpfs.RacyLoad(),
   157  		Mapped:    ms.Mapped.RacyLoad(),
   158  		Ramdiskfs: ms.Ramdiskfs.RacyLoad(),
   159  	}
   160  }
   161  
   162  // MemoryStats tracks application memory usage in bytes. All fields correspond
   163  // to the memory category with the same name.
   164  type MemoryStats struct {
   165  	System    uint64
   166  	Anonymous uint64
   167  	PageCache uint64
   168  	Tmpfs     uint64
   169  	Mapped    uint64
   170  	Ramdiskfs uint64
   171  }
   172  
   173  // RTMemoryStats contains the memory usage values that need to be directly
   174  // exposed through a shared memory file for real-time access. These are
   175  // categories not backed by platform memory. For details about how this works,
   176  // see the memory accounting docs.
   177  //
   178  // N.B. Please keep the struct in sync with the API. Notably, changes to this
   179  // struct requires a version bump and addition of compatibility logic in the
   180  // control server. As a special-case, adding fields without re-ordering existing
   181  // ones do not require a version bump because the mapped page we use is
   182  // initially zeroed. Any added field will be ignored by an older API and will be
   183  // zero if read by a newer API.
   184  type RTMemoryStats struct {
   185  	RTMapped atomicbitops.Uint64
   186  }
   187  
   188  // MemoryLocked is Memory with access methods.
   189  type MemoryLocked struct {
   190  	mu memoryMutex
   191  	// memoryStats records the memory stats.
   192  	memoryStats
   193  	// RTMemoryStats records the memory stats that need to be exposed through
   194  	// shared page.
   195  	*RTMemoryStats
   196  	// File is the backing file storing the memory stats.
   197  	File *os.File
   198  	// MemCgIDToMemStats is the map of cgroup ids to memory stats.
   199  	MemCgIDToMemStats map[uint32]*memoryStats
   200  }
   201  
   202  // Init initializes global 'MemoryAccounting'.
   203  func Init() error {
   204  	const name = "memory-usage"
   205  	fd, err := memutil.CreateMemFD(name, 0)
   206  	if err != nil {
   207  		return fmt.Errorf("error creating usage file: %v", err)
   208  	}
   209  	file := os.NewFile(uintptr(fd), name)
   210  	if err := file.Truncate(int64(RTMemoryStatsSize)); err != nil {
   211  		return fmt.Errorf("error truncating usage file: %v", err)
   212  	}
   213  	// Note: We rely on the returned page being initially zeroed. This will
   214  	// always be the case for a newly mapped page from /dev/shm. If we obtain
   215  	// the shared memory through some other means in the future, we may have to
   216  	// explicitly zero the page.
   217  	mmap, err := memutil.MapFile(0, RTMemoryStatsSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, file.Fd(), 0)
   218  	if err != nil {
   219  		return fmt.Errorf("error mapping usage file: %v", err)
   220  	}
   221  
   222  	MemoryAccounting = &MemoryLocked{
   223  		File:              file,
   224  		RTMemoryStats:     RTMemoryStatsPointer(mmap),
   225  		MemCgIDToMemStats: make(map[uint32]*memoryStats),
   226  	}
   227  	return nil
   228  }
   229  
   230  // MemoryAccounting is the global memory stats.
   231  //
   232  // There is no need to save or restore the global memory accounting object,
   233  // because individual frame kinds are saved and charged only when they become
   234  // resident.
   235  var MemoryAccounting *MemoryLocked
   236  
   237  func (m *MemoryLocked) incLockedPerCg(val uint64, kind MemoryKind, memCgID uint32) {
   238  	if _, ok := m.MemCgIDToMemStats[memCgID]; !ok {
   239  		m.MemCgIDToMemStats[memCgID] = &memoryStats{}
   240  	}
   241  
   242  	ms := m.MemCgIDToMemStats[memCgID]
   243  	ms.incLocked(val, kind)
   244  }
   245  
   246  // Inc adds an additional usage of 'val' bytes to memory category 'kind' for a
   247  // cgroup with id 'memCgID'. If 'memCgID' is zero, the memory is accounted only
   248  // for the total memory usage.
   249  //
   250  // This method is thread-safe.
   251  func (m *MemoryLocked) Inc(val uint64, kind MemoryKind, memCgID uint32) {
   252  	m.mu.Lock()
   253  	defer m.mu.Unlock()
   254  	m.incLocked(val, kind)
   255  	if memCgID != 0 {
   256  		m.incLockedPerCg(val, kind, memCgID)
   257  	}
   258  
   259  	// If the memory category is 'Mapped', update RTMapped.
   260  	if kind == Mapped {
   261  		m.RTMapped.Add(val)
   262  	}
   263  }
   264  
   265  func (m *MemoryLocked) decLockedPerCg(val uint64, kind MemoryKind, memCgID uint32) {
   266  	if _, ok := m.MemCgIDToMemStats[memCgID]; !ok {
   267  		panic(fmt.Sprintf("invalid memory cgroup id: %v", memCgID))
   268  	}
   269  
   270  	ms := m.MemCgIDToMemStats[memCgID]
   271  	ms.decLocked(val, kind)
   272  }
   273  
   274  // Dec removes a usage of 'val' bytes from memory category 'kind' for a cgroup
   275  // with id 'memCgID'. If 'memCgID' is zero, the memory is removed only from the
   276  // total usage.
   277  //
   278  // This method is thread-safe.
   279  func (m *MemoryLocked) Dec(val uint64, kind MemoryKind, memCgID uint32) {
   280  	m.mu.Lock()
   281  	defer m.mu.Unlock()
   282  	m.decLocked(val, kind)
   283  	if memCgID != 0 {
   284  		m.decLockedPerCg(val, kind, memCgID)
   285  	}
   286  
   287  	// If the memory category is 'Mapped', update RTMapped.
   288  	if kind == Mapped {
   289  		m.RTMapped.Add(^(val - 1))
   290  	}
   291  }
   292  
   293  // Move moves a usage of 'val' bytes from 'from' to 'to' for a cgroup with
   294  // id 'memCgID'.
   295  //
   296  // This method is thread-safe.
   297  func (m *MemoryLocked) Move(val uint64, to MemoryKind, from MemoryKind, memCgID uint32) {
   298  	m.mu.Lock()
   299  	defer m.mu.Unlock()
   300  	// Just call decLocked and incLocked directly. We held the Lock to
   301  	// protect against concurrent callers to Total().
   302  	m.decLocked(val, from)
   303  	m.incLocked(val, to)
   304  
   305  	if memCgID != 0 {
   306  		m.decLockedPerCg(val, from, memCgID)
   307  		m.incLockedPerCg(val, to, memCgID)
   308  	}
   309  }
   310  
   311  // Total returns a total memory usage.
   312  //
   313  // This method is thread-safe.
   314  func (m *MemoryLocked) Total() uint64 {
   315  	m.mu.Lock()
   316  	defer m.mu.Unlock()
   317  	return m.totalLocked()
   318  }
   319  
   320  // TotalPerCg returns a total memory usage for a cgroup.
   321  //
   322  // This method is thread-safe.
   323  func (m *MemoryLocked) TotalPerCg(memCgID uint32) uint64 {
   324  	m.mu.Lock()
   325  	defer m.mu.Unlock()
   326  
   327  	// Total memory usage including the sentry memory.
   328  	if memCgID == 0 {
   329  		return m.totalLocked()
   330  	}
   331  	// Memory usage for all cgroups except sentry memory.
   332  	ms, ok := m.MemCgIDToMemStats[memCgID]
   333  	if !ok {
   334  		return 0
   335  	}
   336  	return ms.totalLocked()
   337  }
   338  
   339  // Copy returns a copy of the structure with a total.
   340  //
   341  // This method is thread-safe.
   342  func (m *MemoryLocked) Copy() (MemoryStats, uint64) {
   343  	m.mu.Lock()
   344  	defer m.mu.Unlock()
   345  	return m.copyLocked(), m.totalLocked()
   346  }
   347  
   348  // CopyPerCg returns a copy of the structure with a total for a cgroup.
   349  //
   350  // This method is thread-safe.
   351  func (m *MemoryLocked) CopyPerCg(memCgID uint32) (MemoryStats, uint64) {
   352  	m.mu.Lock()
   353  	defer m.mu.Unlock()
   354  
   355  	// Total memory usage including the sentry memory.
   356  	if memCgID == 0 {
   357  		return m.copyLocked(), m.totalLocked()
   358  	}
   359  	// Memory usage for all cgroups except sentry memory.
   360  	ms, ok := m.MemCgIDToMemStats[memCgID]
   361  	if !ok {
   362  		return MemoryStats{}, 0
   363  	}
   364  	return ms.copyLocked(), ms.totalLocked()
   365  }
   366  
   367  // These options control how much total memory the is reported to the
   368  // application. They may only be set before the application starts executing,
   369  // and must not be modified.
   370  var (
   371  	// MinimumTotalMemoryBytes is the minimum reported total system memory.
   372  	MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB
   373  
   374  	// MaximumTotalMemoryBytes is the maximum reported total system memory.
   375  	// The 0 value indicates no maximum.
   376  	MaximumTotalMemoryBytes uint64
   377  )
   378  
   379  // TotalMemory returns the "total usable memory" available.
   380  //
   381  // This number doesn't really have a true value so it's based on the following
   382  // inputs and further bounded to be above the MinumumTotalMemoryBytes and below
   383  // MaximumTotalMemoryBytes.
   384  //
   385  // memSize should be the platform.Memory size reported by platform.Memory.TotalSize()
   386  // used is the total memory reported by MemoryLocked.Total()
   387  func TotalMemory(memSize, used uint64) uint64 {
   388  	if memSize < MinimumTotalMemoryBytes {
   389  		memSize = MinimumTotalMemoryBytes
   390  	}
   391  	if memSize < used {
   392  		memSize = used
   393  		// Bump memSize to the next largest power of 2, if one exists, so
   394  		// that MemFree isn't 0.
   395  		if msb := bits.MostSignificantOne64(memSize); msb < 63 {
   396  			memSize = uint64(1) << (uint(msb) + 1)
   397  		}
   398  	}
   399  	if MaximumTotalMemoryBytes > 0 && memSize > MaximumTotalMemoryBytes {
   400  		memSize = MaximumTotalMemoryBytes
   401  	}
   402  	return memSize
   403  }