github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/usage/memory.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package usage 16 17 import ( 18 "fmt" 19 "os" 20 21 "golang.org/x/sys/unix" 22 "github.com/metacubex/gvisor/pkg/atomicbitops" 23 "github.com/metacubex/gvisor/pkg/bits" 24 "github.com/metacubex/gvisor/pkg/memutil" 25 ) 26 27 // MemoryKind represents a type of memory used by the application. 28 // 29 // For efficiency reasons, it is assumed that the Memory implementation is 30 // responsible for specific stats (documented below), and those may be reported 31 // in aggregate independently. See the platform.Memory interface as well as the 32 // control.Usage.Collect method for more information. 33 type MemoryKind int 34 35 const ( 36 // System represents miscellaneous system memory. This may include 37 // memory that is in the process of being reclaimed, system caches, 38 // page tables, swap, etc. 39 // 40 // This memory kind is backed by platform memory. 41 System MemoryKind = iota 42 43 // Anonymous represents anonymous application memory. 44 // 45 // This memory kind is backed by platform memory. 46 Anonymous 47 48 // PageCache represents memory allocated to back sandbox-visible files that 49 // do not have a local fd. The contents of these files are buffered in 50 // memory to support application mmaps. 51 // 52 // This memory kind is backed by platform memory. 53 PageCache 54 55 // Tmpfs represents memory used by the sandbox-visible tmpfs. 56 // 57 // This memory kind is backed by platform memory. 58 Tmpfs 59 60 // Ramdiskfs represents memory used by the ramdiskfs. 61 // 62 // This memory kind is backed by platform memory. 63 Ramdiskfs 64 65 // Mapped represents memory related to files which have a local fd on the 66 // host, and thus can be directly mapped. Typically these are files backed 67 // by gofers with donated-fd support. Note that this value may not track the 68 // exact amount of memory used by mapping on the host, because we don't have 69 // any visibility into the host kernel memory management. In particular, 70 // once we map some part of a host file, the host kernel is free to 71 // arbitrarily populate/decommit the pages, which it may do for various 72 // reasons (ex. host memory reclaim, NUMA balancing). 73 // 74 // This memory kind is backed by the host pagecache, via host mmaps. 75 Mapped 76 ) 77 78 // memoryStats tracks application memory usage in bytes. All fields correspond to the 79 // memory category with the same name. This object is thread-safe if accessed 80 // through the provided methods. The public fields may be safely accessed 81 // directly on a copy of the object obtained from Memory.Copy(). 82 type memoryStats struct { 83 System atomicbitops.Uint64 84 Anonymous atomicbitops.Uint64 85 PageCache atomicbitops.Uint64 86 Tmpfs atomicbitops.Uint64 87 Mapped atomicbitops.Uint64 88 Ramdiskfs atomicbitops.Uint64 89 } 90 91 // incLocked adds a usage of 'val' bytes from memory category 'kind'. 92 // 93 // Precondition: must be called when locked. 94 func (ms *memoryStats) incLocked(val uint64, kind MemoryKind) { 95 switch kind { 96 case System: 97 ms.System.Add(val) 98 case Anonymous: 99 ms.Anonymous.Add(val) 100 case PageCache: 101 ms.PageCache.Add(val) 102 case Mapped: 103 ms.Mapped.Add(val) 104 case Tmpfs: 105 ms.Tmpfs.Add(val) 106 case Ramdiskfs: 107 ms.Ramdiskfs.Add(val) 108 default: 109 panic(fmt.Sprintf("invalid memory kind: %v", kind)) 110 } 111 } 112 113 // decLocked removes a usage of 'val' bytes from memory category 'kind'. 114 // 115 // Precondition: must be called when locked. 116 func (ms *memoryStats) decLocked(val uint64, kind MemoryKind) { 117 switch kind { 118 case System: 119 ms.System.Add(^(val - 1)) 120 case Anonymous: 121 ms.Anonymous.Add(^(val - 1)) 122 case PageCache: 123 ms.PageCache.Add(^(val - 1)) 124 case Mapped: 125 ms.Mapped.Add(^(val - 1)) 126 case Tmpfs: 127 ms.Tmpfs.Add(^(val - 1)) 128 case Ramdiskfs: 129 ms.Ramdiskfs.Add(^(val - 1)) 130 default: 131 panic(fmt.Sprintf("invalid memory kind: %v", kind)) 132 } 133 } 134 135 // totalLocked returns a total usage. 136 // 137 // Precondition: must be called when locked. 138 func (ms *memoryStats) totalLocked() (total uint64) { 139 total += ms.System.RacyLoad() 140 total += ms.Anonymous.RacyLoad() 141 total += ms.PageCache.RacyLoad() 142 total += ms.Mapped.RacyLoad() 143 total += ms.Tmpfs.RacyLoad() 144 total += ms.Ramdiskfs.RacyLoad() 145 return 146 } 147 148 // copyLocked returns a copy of the structure. 149 // 150 // Precondition: must be called when locked. 151 func (ms *memoryStats) copyLocked() MemoryStats { 152 return MemoryStats{ 153 System: ms.System.RacyLoad(), 154 Anonymous: ms.Anonymous.RacyLoad(), 155 PageCache: ms.PageCache.RacyLoad(), 156 Tmpfs: ms.Tmpfs.RacyLoad(), 157 Mapped: ms.Mapped.RacyLoad(), 158 Ramdiskfs: ms.Ramdiskfs.RacyLoad(), 159 } 160 } 161 162 // MemoryStats tracks application memory usage in bytes. All fields correspond 163 // to the memory category with the same name. 164 type MemoryStats struct { 165 System uint64 166 Anonymous uint64 167 PageCache uint64 168 Tmpfs uint64 169 Mapped uint64 170 Ramdiskfs uint64 171 } 172 173 // RTMemoryStats contains the memory usage values that need to be directly 174 // exposed through a shared memory file for real-time access. These are 175 // categories not backed by platform memory. For details about how this works, 176 // see the memory accounting docs. 177 // 178 // N.B. Please keep the struct in sync with the API. Notably, changes to this 179 // struct requires a version bump and addition of compatibility logic in the 180 // control server. As a special-case, adding fields without re-ordering existing 181 // ones do not require a version bump because the mapped page we use is 182 // initially zeroed. Any added field will be ignored by an older API and will be 183 // zero if read by a newer API. 184 type RTMemoryStats struct { 185 RTMapped atomicbitops.Uint64 186 } 187 188 // MemoryLocked is Memory with access methods. 189 type MemoryLocked struct { 190 mu memoryMutex 191 // memoryStats records the memory stats. 192 memoryStats 193 // RTMemoryStats records the memory stats that need to be exposed through 194 // shared page. 195 *RTMemoryStats 196 // File is the backing file storing the memory stats. 197 File *os.File 198 // MemCgIDToMemStats is the map of cgroup ids to memory stats. 199 MemCgIDToMemStats map[uint32]*memoryStats 200 } 201 202 // Init initializes global 'MemoryAccounting'. 203 func Init() error { 204 const name = "memory-usage" 205 fd, err := memutil.CreateMemFD(name, 0) 206 if err != nil { 207 return fmt.Errorf("error creating usage file: %v", err) 208 } 209 file := os.NewFile(uintptr(fd), name) 210 if err := file.Truncate(int64(RTMemoryStatsSize)); err != nil { 211 return fmt.Errorf("error truncating usage file: %v", err) 212 } 213 // Note: We rely on the returned page being initially zeroed. This will 214 // always be the case for a newly mapped page from /dev/shm. If we obtain 215 // the shared memory through some other means in the future, we may have to 216 // explicitly zero the page. 217 mmap, err := memutil.MapFile(0, RTMemoryStatsSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, file.Fd(), 0) 218 if err != nil { 219 return fmt.Errorf("error mapping usage file: %v", err) 220 } 221 222 MemoryAccounting = &MemoryLocked{ 223 File: file, 224 RTMemoryStats: RTMemoryStatsPointer(mmap), 225 MemCgIDToMemStats: make(map[uint32]*memoryStats), 226 } 227 return nil 228 } 229 230 // MemoryAccounting is the global memory stats. 231 // 232 // There is no need to save or restore the global memory accounting object, 233 // because individual frame kinds are saved and charged only when they become 234 // resident. 235 var MemoryAccounting *MemoryLocked 236 237 func (m *MemoryLocked) incLockedPerCg(val uint64, kind MemoryKind, memCgID uint32) { 238 if _, ok := m.MemCgIDToMemStats[memCgID]; !ok { 239 m.MemCgIDToMemStats[memCgID] = &memoryStats{} 240 } 241 242 ms := m.MemCgIDToMemStats[memCgID] 243 ms.incLocked(val, kind) 244 } 245 246 // Inc adds an additional usage of 'val' bytes to memory category 'kind' for a 247 // cgroup with id 'memCgID'. If 'memCgID' is zero, the memory is accounted only 248 // for the total memory usage. 249 // 250 // This method is thread-safe. 251 func (m *MemoryLocked) Inc(val uint64, kind MemoryKind, memCgID uint32) { 252 m.mu.Lock() 253 defer m.mu.Unlock() 254 m.incLocked(val, kind) 255 if memCgID != 0 { 256 m.incLockedPerCg(val, kind, memCgID) 257 } 258 259 // If the memory category is 'Mapped', update RTMapped. 260 if kind == Mapped { 261 m.RTMapped.Add(val) 262 } 263 } 264 265 func (m *MemoryLocked) decLockedPerCg(val uint64, kind MemoryKind, memCgID uint32) { 266 if _, ok := m.MemCgIDToMemStats[memCgID]; !ok { 267 panic(fmt.Sprintf("invalid memory cgroup id: %v", memCgID)) 268 } 269 270 ms := m.MemCgIDToMemStats[memCgID] 271 ms.decLocked(val, kind) 272 } 273 274 // Dec removes a usage of 'val' bytes from memory category 'kind' for a cgroup 275 // with id 'memCgID'. If 'memCgID' is zero, the memory is removed only from the 276 // total usage. 277 // 278 // This method is thread-safe. 279 func (m *MemoryLocked) Dec(val uint64, kind MemoryKind, memCgID uint32) { 280 m.mu.Lock() 281 defer m.mu.Unlock() 282 m.decLocked(val, kind) 283 if memCgID != 0 { 284 m.decLockedPerCg(val, kind, memCgID) 285 } 286 287 // If the memory category is 'Mapped', update RTMapped. 288 if kind == Mapped { 289 m.RTMapped.Add(^(val - 1)) 290 } 291 } 292 293 // Move moves a usage of 'val' bytes from 'from' to 'to' for a cgroup with 294 // id 'memCgID'. 295 // 296 // This method is thread-safe. 297 func (m *MemoryLocked) Move(val uint64, to MemoryKind, from MemoryKind, memCgID uint32) { 298 m.mu.Lock() 299 defer m.mu.Unlock() 300 // Just call decLocked and incLocked directly. We held the Lock to 301 // protect against concurrent callers to Total(). 302 m.decLocked(val, from) 303 m.incLocked(val, to) 304 305 if memCgID != 0 { 306 m.decLockedPerCg(val, from, memCgID) 307 m.incLockedPerCg(val, to, memCgID) 308 } 309 } 310 311 // Total returns a total memory usage. 312 // 313 // This method is thread-safe. 314 func (m *MemoryLocked) Total() uint64 { 315 m.mu.Lock() 316 defer m.mu.Unlock() 317 return m.totalLocked() 318 } 319 320 // TotalPerCg returns a total memory usage for a cgroup. 321 // 322 // This method is thread-safe. 323 func (m *MemoryLocked) TotalPerCg(memCgID uint32) uint64 { 324 m.mu.Lock() 325 defer m.mu.Unlock() 326 327 // Total memory usage including the sentry memory. 328 if memCgID == 0 { 329 return m.totalLocked() 330 } 331 // Memory usage for all cgroups except sentry memory. 332 ms, ok := m.MemCgIDToMemStats[memCgID] 333 if !ok { 334 return 0 335 } 336 return ms.totalLocked() 337 } 338 339 // Copy returns a copy of the structure with a total. 340 // 341 // This method is thread-safe. 342 func (m *MemoryLocked) Copy() (MemoryStats, uint64) { 343 m.mu.Lock() 344 defer m.mu.Unlock() 345 return m.copyLocked(), m.totalLocked() 346 } 347 348 // CopyPerCg returns a copy of the structure with a total for a cgroup. 349 // 350 // This method is thread-safe. 351 func (m *MemoryLocked) CopyPerCg(memCgID uint32) (MemoryStats, uint64) { 352 m.mu.Lock() 353 defer m.mu.Unlock() 354 355 // Total memory usage including the sentry memory. 356 if memCgID == 0 { 357 return m.copyLocked(), m.totalLocked() 358 } 359 // Memory usage for all cgroups except sentry memory. 360 ms, ok := m.MemCgIDToMemStats[memCgID] 361 if !ok { 362 return MemoryStats{}, 0 363 } 364 return ms.copyLocked(), ms.totalLocked() 365 } 366 367 // These options control how much total memory the is reported to the 368 // application. They may only be set before the application starts executing, 369 // and must not be modified. 370 var ( 371 // MinimumTotalMemoryBytes is the minimum reported total system memory. 372 MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB 373 374 // MaximumTotalMemoryBytes is the maximum reported total system memory. 375 // The 0 value indicates no maximum. 376 MaximumTotalMemoryBytes uint64 377 ) 378 379 // TotalMemory returns the "total usable memory" available. 380 // 381 // This number doesn't really have a true value so it's based on the following 382 // inputs and further bounded to be above the MinumumTotalMemoryBytes and below 383 // MaximumTotalMemoryBytes. 384 // 385 // memSize should be the platform.Memory size reported by platform.Memory.TotalSize() 386 // used is the total memory reported by MemoryLocked.Total() 387 func TotalMemory(memSize, used uint64) uint64 { 388 if memSize < MinimumTotalMemoryBytes { 389 memSize = MinimumTotalMemoryBytes 390 } 391 if memSize < used { 392 memSize = used 393 // Bump memSize to the next largest power of 2, if one exists, so 394 // that MemFree isn't 0. 395 if msb := bits.MostSignificantOne64(memSize); msb < 63 { 396 memSize = uint64(1) << (uint(msb) + 1) 397 } 398 } 399 if MaximumTotalMemoryBytes > 0 && memSize > MaximumTotalMemoryBytes { 400 memSize = MaximumTotalMemoryBytes 401 } 402 return memSize 403 }