github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/mm/syscalls.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mm 16 17 import ( 18 "fmt" 19 mrand "math/rand" 20 21 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 22 "github.com/nicocha30/gvisor-ligolo/pkg/context" 23 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 24 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/futex" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/limits" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap" 29 ) 30 31 // HandleUserFault handles an application page fault. sp is the faulting 32 // application thread's stack pointer. 33 // 34 // Preconditions: mm.as != nil. 35 func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr, at hostarch.AccessType, sp hostarch.Addr) error { 36 ar, ok := addr.RoundDown().ToRange(hostarch.PageSize) 37 if !ok { 38 return linuxerr.EFAULT 39 } 40 41 // Don't bother trying existingPMAsLocked; in most cases, if we did have 42 // existing pmas, we wouldn't have faulted. 43 44 // Ensure that we have a usable vma. Here and below, since we are only 45 // asking for a single page, there is no possibility of partial success, 46 // and any error is immediately fatal. 47 mm.mappingMu.RLock() 48 vseg, _, err := mm.getVMAsLocked(ctx, ar, at, false) 49 if err != nil { 50 mm.mappingMu.RUnlock() 51 return err 52 } 53 54 // Ensure that we have a usable pma. 55 mm.activeMu.Lock() 56 pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, at) 57 mm.mappingMu.RUnlock() 58 if err != nil { 59 mm.activeMu.Unlock() 60 return err 61 } 62 63 // Downgrade to a read-lock on activeMu since we don't need to mutate pmas 64 // anymore. 65 mm.activeMu.DowngradeLock() 66 67 // Map the faulted page into the active AddressSpace. 68 err = mm.mapASLocked(pseg, ar, false) 69 mm.activeMu.RUnlock() 70 return err 71 } 72 73 // MMap establishes a memory mapping. 74 func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) { 75 if opts.Length == 0 { 76 return 0, linuxerr.EINVAL 77 } 78 length, ok := hostarch.Addr(opts.Length).RoundUp() 79 if !ok { 80 return 0, linuxerr.ENOMEM 81 } 82 opts.Length = uint64(length) 83 84 if opts.Mappable != nil { 85 // Offset must be aligned. 86 if hostarch.Addr(opts.Offset).RoundDown() != hostarch.Addr(opts.Offset) { 87 return 0, linuxerr.EINVAL 88 } 89 // Offset + length must not overflow. 90 if end := opts.Offset + opts.Length; end < opts.Offset { 91 return 0, linuxerr.EOVERFLOW 92 } 93 } else { 94 opts.Offset = 0 95 } 96 97 if opts.Addr.RoundDown() != opts.Addr { 98 // MAP_FIXED requires addr to be page-aligned; non-fixed mappings 99 // don't. 100 if opts.Fixed { 101 return 0, linuxerr.EINVAL 102 } 103 opts.Addr = opts.Addr.RoundDown() 104 } 105 106 if !opts.MaxPerms.SupersetOf(opts.Perms) { 107 return 0, linuxerr.EACCES 108 } 109 if opts.Unmap && !opts.Fixed { 110 return 0, linuxerr.EINVAL 111 } 112 if opts.GrowsDown && opts.Mappable != nil { 113 return 0, linuxerr.EINVAL 114 } 115 116 // Get the new vma. 117 var droppedIDs []memmap.MappingIdentity 118 mm.mappingMu.Lock() 119 if opts.MLockMode < mm.defMLockMode { 120 opts.MLockMode = mm.defMLockMode 121 } 122 vseg, ar, droppedIDs, err := mm.createVMALocked(ctx, opts, droppedIDs) 123 if err != nil { 124 mm.mappingMu.Unlock() 125 return 0, err 126 } 127 128 // TODO(jamieliu): In Linux, VM_LOCKONFAULT (which may be set on the new 129 // vma by mlockall(MCL_FUTURE|MCL_ONFAULT) => mm_struct::def_flags) appears 130 // to effectively disable MAP_POPULATE by unsetting FOLL_POPULATE in 131 // mm/util.c:vm_mmap_pgoff() => mm/gup.c:__mm_populate() => 132 // populate_vma_page_range(). Confirm this behavior. 133 switch { 134 case opts.Precommit || opts.MLockMode == memmap.MLockEager: 135 // Get pmas and map with precommit as requested. 136 mm.populateVMAAndUnlock(ctx, vseg, ar, true) 137 138 case opts.Mappable == nil && length <= privateAllocUnit: 139 // NOTE(b/63077076, b/63360184): Get pmas and map eagerly in the hope 140 // that doing so will save on future page faults. We only do this for 141 // anonymous mappings, since otherwise the cost of 142 // memmap.Mappable.Translate is unknown; and only for small mappings, 143 // to avoid needing to allocate large amounts of memory that we may 144 // subsequently need to checkpoint. 145 mm.populateVMAAndUnlock(ctx, vseg, ar, false) 146 147 default: 148 mm.mappingMu.Unlock() 149 } 150 151 for _, id := range droppedIDs { 152 id.DecRef(ctx) 153 } 154 155 return ar.Start, nil 156 } 157 158 // populateVMA obtains pmas for addresses in ar in the given vma, and maps them 159 // into mm.as if it is active. 160 // 161 // Preconditions: 162 // - mm.mappingMu must be locked. 163 // - vseg.Range().IsSupersetOf(ar). 164 func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { 165 if !vseg.ValuePtr().effectivePerms.Any() { 166 // Linux doesn't populate inaccessible pages. See 167 // mm/gup.c:populate_vma_page_range. 168 return 169 } 170 171 mm.activeMu.Lock() 172 // Can't defer mm.activeMu.Unlock(); see below. 173 174 // Even if we get new pmas, we can't actually map them if we don't have an 175 // AddressSpace. 176 if mm.as == nil { 177 mm.activeMu.Unlock() 178 return 179 } 180 181 // Ensure that we have usable pmas. 182 pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess) 183 if err != nil { 184 // mm/util.c:vm_mmap_pgoff() ignores the error, if any, from 185 // mm/gup.c:mm_populate(). If it matters, we'll get it again when 186 // userspace actually tries to use the failing page. 187 mm.activeMu.Unlock() 188 return 189 } 190 191 // Downgrade to a read-lock on activeMu since we don't need to mutate pmas 192 // anymore. 193 mm.activeMu.DowngradeLock() 194 195 // As above, errors are silently ignored. 196 mm.mapASLocked(pseg, ar, precommit) 197 mm.activeMu.RUnlock() 198 } 199 200 // populateVMAAndUnlock is equivalent to populateVMA, but also unconditionally 201 // unlocks mm.mappingMu. In cases where populateVMAAndUnlock is usable, it is 202 // preferable to populateVMA since it unlocks mm.mappingMu before performing 203 // expensive operations that don't require it to be locked. 204 // 205 // Preconditions: 206 // - mm.mappingMu must be locked for writing. 207 // - vseg.Range().IsSupersetOf(ar). 208 // 209 // Postconditions: mm.mappingMu will be unlocked. 210 // +checklocksrelease:mm.mappingMu 211 func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { 212 // See populateVMA above for commentary. 213 if !vseg.ValuePtr().effectivePerms.Any() { 214 mm.mappingMu.Unlock() 215 return 216 } 217 218 mm.activeMu.Lock() 219 220 if mm.as == nil { 221 mm.activeMu.Unlock() 222 mm.mappingMu.Unlock() 223 return 224 } 225 226 // mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it 227 // isn't needed at all for mapASLocked. 228 mm.mappingMu.DowngradeLock() 229 pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess) 230 mm.mappingMu.RUnlock() 231 if err != nil { 232 mm.activeMu.Unlock() 233 return 234 } 235 236 mm.activeMu.DowngradeLock() 237 mm.mapASLocked(pseg, ar, precommit) 238 mm.activeMu.RUnlock() 239 } 240 241 // MapStack allocates the initial process stack. 242 func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, error) { 243 // maxStackSize is the maximum supported process stack size in bytes. 244 // 245 // This limit exists because stack growing isn't implemented, so the entire 246 // process stack must be mapped up-front. 247 const maxStackSize = 128 << 20 248 249 stackSize := limits.FromContext(ctx).Get(limits.Stack) 250 r, ok := hostarch.Addr(stackSize.Cur).RoundUp() 251 sz := uint64(r) 252 if !ok { 253 // RLIM_INFINITY rounds up to 0. 254 sz = linux.DefaultStackSoftLimit 255 } else if sz > maxStackSize { 256 ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize) 257 sz = maxStackSize 258 } else if sz == 0 { 259 return hostarch.AddrRange{}, linuxerr.ENOMEM 260 } 261 szaddr := hostarch.Addr(sz) 262 ctx.Debugf("Allocating stack with size of %v bytes", sz) 263 264 // Determine the stack's desired location. Unlike Linux, address 265 // randomization can't be disabled. 266 stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown() 267 if stackEnd < szaddr { 268 return hostarch.AddrRange{}, linuxerr.ENOMEM 269 } 270 stackStart := stackEnd - szaddr 271 var droppedIDs []memmap.MappingIdentity 272 var ar hostarch.AddrRange 273 var err error 274 mm.mappingMu.Lock() 275 _, ar, droppedIDs, err = mm.createVMALocked(ctx, memmap.MMapOpts{ 276 Length: sz, 277 Addr: stackStart, 278 Perms: hostarch.ReadWrite, 279 MaxPerms: hostarch.AnyAccess, 280 Private: true, 281 GrowsDown: true, 282 MLockMode: mm.defMLockMode, 283 Hint: "[stack]", 284 }, droppedIDs) 285 mm.mappingMu.Unlock() 286 for _, id := range droppedIDs { 287 id.DecRef(ctx) 288 } 289 return ar, err 290 } 291 292 // MUnmap implements the semantics of Linux's munmap(2). 293 func (mm *MemoryManager) MUnmap(ctx context.Context, addr hostarch.Addr, length uint64) error { 294 if addr != addr.RoundDown() { 295 return linuxerr.EINVAL 296 } 297 if length == 0 { 298 return linuxerr.EINVAL 299 } 300 la, ok := hostarch.Addr(length).RoundUp() 301 if !ok { 302 return linuxerr.EINVAL 303 } 304 ar, ok := addr.ToRange(uint64(la)) 305 if !ok { 306 return linuxerr.EINVAL 307 } 308 309 var droppedIDs []memmap.MappingIdentity 310 mm.mappingMu.Lock() 311 _, droppedIDs = mm.unmapLocked(ctx, ar, droppedIDs) 312 mm.mappingMu.Unlock() 313 314 for _, id := range droppedIDs { 315 id.DecRef(ctx) 316 } 317 318 return nil 319 } 320 321 // MRemapOpts specifies options to MRemap. 322 type MRemapOpts struct { 323 // Move controls whether MRemap moves the remapped mapping to a new address. 324 Move MRemapMoveMode 325 326 // NewAddr is the new address for the remapping. NewAddr is ignored unless 327 // Move is MMRemapMustMove. 328 NewAddr hostarch.Addr 329 } 330 331 // MRemapMoveMode controls MRemap's moving behavior. 332 type MRemapMoveMode int 333 334 const ( 335 // MRemapNoMove prevents MRemap from moving the remapped mapping. 336 MRemapNoMove MRemapMoveMode = iota 337 338 // MRemapMayMove allows MRemap to move the remapped mapping. 339 MRemapMayMove 340 341 // MRemapMustMove requires MRemap to move the remapped mapping to 342 // MRemapOpts.NewAddr, replacing any existing mappings in the remapped 343 // range. 344 MRemapMustMove 345 ) 346 347 // MRemap implements the semantics of Linux's mremap(2). 348 func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (hostarch.Addr, error) { 349 // "Note that old_address has to be page aligned." - mremap(2) 350 if oldAddr.RoundDown() != oldAddr { 351 return 0, linuxerr.EINVAL 352 } 353 354 // Linux treats an old_size that rounds up to 0 as 0, which is otherwise a 355 // valid size. However, new_size can't be 0 after rounding. 356 oldSizeAddr, _ := hostarch.Addr(oldSize).RoundUp() 357 oldSize = uint64(oldSizeAddr) 358 newSizeAddr, ok := hostarch.Addr(newSize).RoundUp() 359 if !ok || newSizeAddr == 0 { 360 return 0, linuxerr.EINVAL 361 } 362 newSize = uint64(newSizeAddr) 363 364 oldEnd, ok := oldAddr.AddLength(oldSize) 365 if !ok { 366 return 0, linuxerr.EINVAL 367 } 368 369 var droppedIDs []memmap.MappingIdentity 370 // This must run after mm.mappingMu.Unlock(). 371 defer func() { 372 for _, id := range droppedIDs { 373 id.DecRef(ctx) 374 } 375 }() 376 377 mm.mappingMu.Lock() 378 defer mm.mappingMu.Unlock() 379 380 // All cases require that a vma exists at oldAddr. 381 vseg := mm.vmas.FindSegment(oldAddr) 382 if !vseg.Ok() { 383 return 0, linuxerr.EFAULT 384 } 385 386 // Behavior matrix: 387 // 388 // Move | oldSize = 0 | oldSize < newSize | oldSize = newSize | oldSize > newSize 389 // ---------+-------------+-------------------+-------------------+------------------ 390 // NoMove | ENOMEM [1] | Grow in-place | No-op | Shrink in-place 391 // MayMove | Copy [1] | Grow in-place or | No-op | Shrink in-place 392 // | | move | | 393 // MustMove | Copy | Move and grow | Move | Shrink and move 394 // 395 // [1] In-place growth is impossible because the vma at oldAddr already 396 // occupies at least part of the destination. Thus the NoMove case always 397 // fails and the MayMove case always falls back to copying. 398 399 if vma := vseg.ValuePtr(); newSize > oldSize && vma.mlockMode != memmap.MLockNone { 400 // Check against RLIMIT_MEMLOCK. Unlike mmap, mlock, and mlockall, 401 // mremap in Linux does not check mm/mlock.c:can_do_mlock() and 402 // therefore does not return EPERM if RLIMIT_MEMLOCK is 0 and 403 // !CAP_IPC_LOCK. 404 mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur 405 if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { 406 if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit { 407 return 0, linuxerr.EAGAIN 408 } 409 } 410 } 411 412 if opts.Move != MRemapMustMove { 413 // Handle no-ops and in-place shrinking. These cases don't care if 414 // [oldAddr, oldEnd) maps to a single vma, or is even mapped at all 415 // (aside from oldAddr). 416 if newSize <= oldSize { 417 if newSize < oldSize { 418 // If oldAddr+oldSize didn't overflow, oldAddr+newSize can't 419 // either. 420 newEnd := oldAddr + hostarch.Addr(newSize) 421 _, droppedIDs = mm.unmapLocked(ctx, hostarch.AddrRange{newEnd, oldEnd}, droppedIDs) 422 } 423 return oldAddr, nil 424 } 425 426 // Handle in-place growing. 427 428 // Check that oldEnd maps to the same vma as oldAddr. 429 if vseg.End() < oldEnd { 430 return 0, linuxerr.EFAULT 431 } 432 // "Grow" the existing vma by creating a new mergeable one. 433 vma := vseg.ValuePtr() 434 var newOffset uint64 435 if vma.mappable != nil { 436 newOffset = vseg.mappableRange().End 437 } 438 var vseg vmaIterator 439 var ar hostarch.AddrRange 440 var err error 441 vseg, ar, droppedIDs, err = mm.createVMALocked(ctx, memmap.MMapOpts{ 442 Length: newSize - oldSize, 443 MappingIdentity: vma.id, 444 Mappable: vma.mappable, 445 Offset: newOffset, 446 Addr: oldEnd, 447 Fixed: true, 448 Perms: vma.realPerms, 449 MaxPerms: vma.maxPerms, 450 Private: vma.private, 451 GrowsDown: vma.growsDown, 452 MLockMode: vma.mlockMode, 453 Hint: vma.hint, 454 }, droppedIDs) 455 if err == nil { 456 if vma.mlockMode == memmap.MLockEager { 457 mm.populateVMA(ctx, vseg, ar, true) 458 } 459 return oldAddr, nil 460 } 461 // In-place growth failed. In the MRemapMayMove case, fall through to 462 // copying/moving below. 463 if opts.Move == MRemapNoMove { 464 return 0, err 465 } 466 } 467 468 // Find a location for the new mapping. 469 var newAR hostarch.AddrRange 470 switch opts.Move { 471 case MRemapMayMove: 472 newAddr, err := mm.findAvailableLocked(newSize, findAvailableOpts{}) 473 if err != nil { 474 return 0, err 475 } 476 newAR, _ = newAddr.ToRange(newSize) 477 478 case MRemapMustMove: 479 newAddr := opts.NewAddr 480 if newAddr.RoundDown() != newAddr { 481 return 0, linuxerr.EINVAL 482 } 483 var ok bool 484 newAR, ok = newAddr.ToRange(newSize) 485 if !ok { 486 return 0, linuxerr.EINVAL 487 } 488 if (hostarch.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) { 489 return 0, linuxerr.EINVAL 490 } 491 492 // Check that the new region is valid. 493 _, err := mm.findAvailableLocked(newSize, findAvailableOpts{ 494 Addr: newAddr, 495 Fixed: true, 496 Unmap: true, 497 }) 498 if err != nil { 499 return 0, err 500 } 501 502 // Unmap any mappings at the destination. 503 _, droppedIDs = mm.unmapLocked(ctx, newAR, droppedIDs) 504 505 // If the sizes specify shrinking, unmap everything between the new and 506 // old sizes at the source. Unmapping before the following checks is 507 // correct: compare Linux's mm/mremap.c:mremap_to() => do_munmap(), 508 // vma_to_resize(). 509 if newSize < oldSize { 510 oldNewEnd := oldAddr + hostarch.Addr(newSize) 511 _, droppedIDs = mm.unmapLocked(ctx, hostarch.AddrRange{oldNewEnd, oldEnd}, droppedIDs) 512 oldEnd = oldNewEnd 513 } 514 515 // unmapLocked may have invalidated vseg; look it up again. 516 vseg = mm.vmas.FindSegment(oldAddr) 517 } 518 519 oldAR := hostarch.AddrRange{oldAddr, oldEnd} 520 521 // Check that oldEnd maps to the same vma as oldAddr. 522 if vseg.End() < oldEnd { 523 return 0, linuxerr.EFAULT 524 } 525 526 // Check against RLIMIT_AS. 527 newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length()) 528 if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { 529 return 0, linuxerr.ENOMEM 530 } 531 532 if vma := vseg.ValuePtr(); vma.mappable != nil { 533 // Check that offset+length does not overflow. 534 if vma.off+uint64(newAR.Length()) < vma.off { 535 return 0, linuxerr.EINVAL 536 } 537 // Inform the Mappable, if any, of the new mapping. 538 if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.canWriteMappableLocked()); err != nil { 539 return 0, err 540 } 541 } 542 543 if oldSize == 0 { 544 // Handle copying. 545 // 546 // We can't use createVMALocked because it calls Mappable.AddMapping, 547 // whereas we've already called Mappable.CopyMapping (which is 548 // consistent with Linux). 549 vma := vseg.ValuePtr().copy() 550 if vma.mappable != nil { 551 vma.off = vseg.mappableOffsetAt(oldAR.Start) 552 } 553 if vma.id != nil { 554 vma.id.IncRef() 555 } 556 vseg := mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma) 557 mm.usageAS += uint64(newAR.Length()) 558 if vma.isPrivateDataLocked() { 559 mm.dataAS += uint64(newAR.Length()) 560 } 561 if vma.mlockMode != memmap.MLockNone { 562 mm.lockedAS += uint64(newAR.Length()) 563 if vma.mlockMode == memmap.MLockEager { 564 mm.populateVMA(ctx, vseg, newAR, true) 565 } 566 } 567 return newAR.Start, nil 568 } 569 570 // Handle moving. 571 // 572 // Remove the existing vma before inserting the new one to minimize 573 // iterator invalidation. We do this directly (instead of calling 574 // removeVMAsLocked) because: 575 // 576 // 1. We can't drop the reference on vma.id, which will be transferred to 577 // the new vma. 578 // 579 // 2. We can't call vma.mappable.RemoveMapping, because pmas are still at 580 // oldAR, so calling RemoveMapping could cause us to miss an invalidation 581 // overlapping oldAR. 582 vseg = mm.vmas.Isolate(vseg, oldAR) 583 vma := vseg.ValuePtr().copy() 584 mm.vmas.Remove(vseg) 585 vseg = mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma) 586 mm.usageAS = mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length()) 587 if vma.isPrivateDataLocked() { 588 mm.dataAS = mm.dataAS - uint64(oldAR.Length()) + uint64(newAR.Length()) 589 } 590 if vma.mlockMode != memmap.MLockNone { 591 mm.lockedAS = mm.lockedAS - uint64(oldAR.Length()) + uint64(newAR.Length()) 592 } 593 594 // Move pmas. This is technically optional for non-private pmas, which 595 // could just go through memmap.Mappable.Translate again, but it's required 596 // for private pmas. 597 mm.activeMu.Lock() 598 mm.movePMAsLocked(oldAR, newAR) 599 mm.activeMu.Unlock() 600 601 // Now that pmas have been moved to newAR, we can notify vma.mappable that 602 // oldAR is no longer mapped. 603 if vma.mappable != nil { 604 vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.canWriteMappableLocked()) 605 } 606 607 if vma.mlockMode == memmap.MLockEager { 608 mm.populateVMA(ctx, vseg, newAR, true) 609 } 610 611 return newAR.Start, nil 612 } 613 614 // MProtect implements the semantics of Linux's mprotect(2). 615 func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms hostarch.AccessType, growsDown bool) error { 616 if addr.RoundDown() != addr { 617 return linuxerr.EINVAL 618 } 619 if length == 0 { 620 return nil 621 } 622 rlength, ok := hostarch.Addr(length).RoundUp() 623 if !ok { 624 return linuxerr.ENOMEM 625 } 626 ar, ok := addr.ToRange(uint64(rlength)) 627 if !ok { 628 return linuxerr.ENOMEM 629 } 630 effectivePerms := realPerms.Effective() 631 632 mm.mappingMu.Lock() 633 defer mm.mappingMu.Unlock() 634 // Non-growsDown mprotect requires that all of ar is mapped, and stops at 635 // the first non-empty gap. growsDown mprotect requires that the first vma 636 // be growsDown, but does not require it to extend all the way to ar.Start; 637 // vmas after the first must be contiguous but need not be growsDown, like 638 // the non-growsDown case. 639 vseg := mm.vmas.LowerBoundSegment(ar.Start) 640 if !vseg.Ok() { 641 return linuxerr.ENOMEM 642 } 643 if growsDown { 644 if !vseg.ValuePtr().growsDown { 645 return linuxerr.EINVAL 646 } 647 if ar.End <= vseg.Start() { 648 return linuxerr.ENOMEM 649 } 650 ar.Start = vseg.Start() 651 } else { 652 if ar.Start < vseg.Start() { 653 return linuxerr.ENOMEM 654 } 655 } 656 657 mm.activeMu.Lock() 658 defer mm.activeMu.Unlock() 659 defer func() { 660 mm.vmas.MergeRange(ar) 661 mm.vmas.MergeAdjacent(ar) 662 mm.pmas.MergeRange(ar) 663 mm.pmas.MergeAdjacent(ar) 664 }() 665 pseg := mm.pmas.LowerBoundSegment(ar.Start) 666 var didUnmapAS bool 667 for { 668 // Check for permission validity before splitting vmas, for consistency 669 // with Linux. 670 if !vseg.ValuePtr().maxPerms.SupersetOf(effectivePerms) { 671 return linuxerr.EACCES 672 } 673 vseg = mm.vmas.Isolate(vseg, ar) 674 675 // Update vma permissions. 676 vma := vseg.ValuePtr() 677 vmaLength := vseg.Range().Length() 678 if vma.isPrivateDataLocked() { 679 mm.dataAS -= uint64(vmaLength) 680 } 681 682 vma.realPerms = realPerms 683 vma.effectivePerms = effectivePerms 684 if vma.isPrivateDataLocked() { 685 mm.dataAS += uint64(vmaLength) 686 } 687 688 // Propagate vma permission changes to pmas. 689 for pseg.Ok() && pseg.Start() < vseg.End() { 690 if pseg.Range().Overlaps(vseg.Range()) { 691 pseg = mm.pmas.Isolate(pseg, vseg.Range()) 692 pma := pseg.ValuePtr() 693 if !effectivePerms.SupersetOf(pma.effectivePerms) && !didUnmapAS { 694 // Unmap all of ar, not just vseg.Range(), to minimize host 695 // syscalls. 696 mm.unmapASLocked(ar) 697 didUnmapAS = true 698 } 699 pma.effectivePerms = effectivePerms.Intersect(pma.translatePerms) 700 if pma.needCOW { 701 pma.effectivePerms.Write = false 702 } 703 } 704 pseg = pseg.NextSegment() 705 } 706 707 // Continue to the next vma. 708 if ar.End <= vseg.End() { 709 return nil 710 } 711 vseg, _ = vseg.NextNonEmpty() 712 if !vseg.Ok() { 713 return linuxerr.ENOMEM 714 } 715 } 716 } 717 718 // BrkSetup sets mm's brk address to addr and its brk size to 0. 719 func (mm *MemoryManager) BrkSetup(ctx context.Context, addr hostarch.Addr) { 720 var droppedIDs []memmap.MappingIdentity 721 mm.mappingMu.Lock() 722 // Unmap the existing brk. 723 if mm.brk.Length() != 0 { 724 _, droppedIDs = mm.unmapLocked(ctx, mm.brk, droppedIDs) 725 } 726 mm.brk = hostarch.AddrRange{addr, addr} 727 mm.mappingMu.Unlock() 728 for _, id := range droppedIDs { 729 id.DecRef(ctx) 730 } 731 } 732 733 // Brk implements the semantics of Linux's brk(2), except that it returns an 734 // error on failure. 735 func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch.Addr, error) { 736 mm.mappingMu.Lock() 737 // Can't defer mm.mappingMu.Unlock(); see below. 738 739 if addr < mm.brk.Start { 740 addr = mm.brk.End 741 mm.mappingMu.Unlock() 742 return addr, linuxerr.EINVAL 743 } 744 745 // TODO(gvisor.dev/issue/156): This enforces RLIMIT_DATA, but is 746 // slightly more permissive than the usual data limit. In particular, 747 // this only limits the size of the heap; a true RLIMIT_DATA limits the 748 // size of heap + data + bss. The segment sizes need to be plumbed from 749 // the loader package to fully enforce RLIMIT_DATA. 750 if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur { 751 addr = mm.brk.End 752 mm.mappingMu.Unlock() 753 return addr, linuxerr.ENOMEM 754 } 755 756 oldbrkpg, _ := mm.brk.End.RoundUp() 757 newbrkpg, ok := addr.RoundUp() 758 if !ok { 759 addr = mm.brk.End 760 mm.mappingMu.Unlock() 761 return addr, linuxerr.EFAULT 762 } 763 764 var vseg vmaIterator 765 var ar hostarch.AddrRange 766 var err error 767 768 var droppedIDs []memmap.MappingIdentity 769 // This must run after mm.mappingMu.Unlock(). 770 defer func() { 771 for _, id := range droppedIDs { 772 id.DecRef(ctx) 773 } 774 }() 775 776 switch { 777 case oldbrkpg < newbrkpg: 778 vseg, ar, droppedIDs, err = mm.createVMALocked(ctx, memmap.MMapOpts{ 779 Length: uint64(newbrkpg - oldbrkpg), 780 Addr: oldbrkpg, 781 Fixed: true, 782 // Compare Linux's 783 // arch/x86/include/asm/page_types.h:VM_DATA_DEFAULT_FLAGS. 784 Perms: hostarch.ReadWrite, 785 MaxPerms: hostarch.AnyAccess, 786 Private: true, 787 // Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes 788 // mm->def_flags. 789 MLockMode: mm.defMLockMode, 790 Hint: "[heap]", 791 }, droppedIDs) 792 if err != nil { 793 addr = mm.brk.End 794 mm.mappingMu.Unlock() 795 return addr, err 796 } 797 mm.brk.End = addr 798 if mm.defMLockMode == memmap.MLockEager { 799 mm.populateVMAAndUnlock(ctx, vseg, ar, true) 800 } else { 801 mm.mappingMu.Unlock() 802 } 803 804 case newbrkpg < oldbrkpg: 805 _, droppedIDs = mm.unmapLocked(ctx, hostarch.AddrRange{newbrkpg, oldbrkpg}, droppedIDs) 806 fallthrough 807 808 default: 809 mm.brk.End = addr 810 mm.mappingMu.Unlock() 811 } 812 813 return addr, nil 814 } 815 816 // MLock implements the semantics of Linux's mlock()/mlock2()/munlock(), 817 // depending on mode. 818 func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length uint64, mode memmap.MLockMode) error { 819 // Linux allows this to overflow. 820 la, _ := hostarch.Addr(length + addr.PageOffset()).RoundUp() 821 ar, ok := addr.RoundDown().ToRange(uint64(la)) 822 if !ok { 823 return linuxerr.EINVAL 824 } 825 826 mm.mappingMu.Lock() 827 // Can't defer mm.mappingMu.Unlock(); see below. 828 829 if mode != memmap.MLockNone { 830 // Check against RLIMIT_MEMLOCK. 831 if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { 832 mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur 833 if mlockLimit == 0 { 834 mm.mappingMu.Unlock() 835 return linuxerr.EPERM 836 } 837 if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit { 838 mm.mappingMu.Unlock() 839 return linuxerr.ENOMEM 840 } 841 } 842 } 843 844 // Check this after RLIMIT_MEMLOCK for consistency with Linux. 845 if ar.Length() == 0 { 846 mm.mappingMu.Unlock() 847 return nil 848 } 849 850 // Apply the new mlock mode to vmas. 851 var unmapped bool 852 vseg := mm.vmas.FindSegment(ar.Start) 853 for { 854 if !vseg.Ok() { 855 unmapped = true 856 break 857 } 858 vseg = mm.vmas.Isolate(vseg, ar) 859 vma := vseg.ValuePtr() 860 prevMode := vma.mlockMode 861 vma.mlockMode = mode 862 if mode != memmap.MLockNone && prevMode == memmap.MLockNone { 863 mm.lockedAS += uint64(vseg.Range().Length()) 864 } else if mode == memmap.MLockNone && prevMode != memmap.MLockNone { 865 mm.lockedAS -= uint64(vseg.Range().Length()) 866 } 867 if ar.End <= vseg.End() { 868 break 869 } 870 vseg, _ = vseg.NextNonEmpty() 871 } 872 mm.vmas.MergeRange(ar) 873 mm.vmas.MergeAdjacent(ar) 874 if unmapped { 875 mm.mappingMu.Unlock() 876 return linuxerr.ENOMEM 877 } 878 879 if mode == memmap.MLockEager { 880 // Ensure that we have usable pmas. Since we didn't return ENOMEM 881 // above, ar must be fully covered by vmas, so we can just use 882 // NextSegment below. 883 mm.activeMu.Lock() 884 mm.mappingMu.DowngradeLock() 885 for vseg := mm.vmas.FindSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { 886 if !vseg.ValuePtr().effectivePerms.Any() { 887 // Linux: mm/gup.c:__get_user_pages() returns EFAULT in this 888 // case, which is converted to ENOMEM by mlock. 889 mm.activeMu.Unlock() 890 mm.mappingMu.RUnlock() 891 return linuxerr.ENOMEM 892 } 893 _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess) 894 if err != nil { 895 mm.activeMu.Unlock() 896 mm.mappingMu.RUnlock() 897 // Linux: mm/mlock.c:__mlock_posix_error_return() 898 if linuxerr.Equals(linuxerr.EFAULT, err) { 899 return linuxerr.ENOMEM 900 } 901 if linuxerr.Equals(linuxerr.ENOMEM, err) { 902 return linuxerr.EAGAIN 903 } 904 return err 905 } 906 } 907 908 // Map pmas into the active AddressSpace, if we have one. 909 mm.mappingMu.RUnlock() 910 if mm.as != nil { 911 mm.activeMu.DowngradeLock() 912 err := mm.mapASLocked(mm.pmas.LowerBoundSegment(ar.Start), ar, true /* precommit */) 913 mm.activeMu.RUnlock() 914 if err != nil { 915 return err 916 } 917 } else { 918 mm.activeMu.Unlock() 919 } 920 } else { 921 mm.mappingMu.Unlock() 922 } 923 924 return nil 925 } 926 927 // MLockAllOpts holds options to MLockAll. 928 type MLockAllOpts struct { 929 // If Current is true, change the memory-locking behavior of all mappings 930 // to Mode. If Future is true, upgrade the memory-locking behavior of all 931 // future mappings to Mode. At least one of Current or Future must be true. 932 Current bool 933 Future bool 934 Mode memmap.MLockMode 935 } 936 937 // MLockAll implements the semantics of Linux's mlockall()/munlockall(), 938 // depending on opts. 939 func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error { 940 if !opts.Current && !opts.Future { 941 return linuxerr.EINVAL 942 } 943 944 mm.mappingMu.Lock() 945 // Can't defer mm.mappingMu.Unlock(); see below. 946 947 if opts.Current { 948 if opts.Mode != memmap.MLockNone { 949 // Check against RLIMIT_MEMLOCK. 950 if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { 951 mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur 952 if mlockLimit == 0 { 953 mm.mappingMu.Unlock() 954 return linuxerr.EPERM 955 } 956 if uint64(mm.vmas.Span()) > mlockLimit { 957 mm.mappingMu.Unlock() 958 return linuxerr.ENOMEM 959 } 960 } 961 } 962 for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { 963 vma := vseg.ValuePtr() 964 prevMode := vma.mlockMode 965 vma.mlockMode = opts.Mode 966 if opts.Mode != memmap.MLockNone && prevMode == memmap.MLockNone { 967 mm.lockedAS += uint64(vseg.Range().Length()) 968 } else if opts.Mode == memmap.MLockNone && prevMode != memmap.MLockNone { 969 mm.lockedAS -= uint64(vseg.Range().Length()) 970 } 971 } 972 } 973 974 if opts.Future { 975 mm.defMLockMode = opts.Mode 976 } 977 978 if opts.Current && opts.Mode == memmap.MLockEager { 979 // Linux: mm/mlock.c:sys_mlockall() => include/linux/mm.h:mm_populate() 980 // ignores the return value of __mm_populate(), so all errors below are 981 // ignored. 982 // 983 // Try to get usable pmas. 984 mm.activeMu.Lock() 985 mm.mappingMu.DowngradeLock() 986 for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { 987 if vseg.ValuePtr().effectivePerms.Any() { 988 mm.getPMAsLocked(ctx, vseg, vseg.Range(), hostarch.NoAccess) 989 } 990 } 991 992 // Map all pmas into the active AddressSpace, if we have one. 993 mm.mappingMu.RUnlock() 994 if mm.as != nil { 995 mm.activeMu.DowngradeLock() 996 mm.mapASLocked(mm.pmas.FirstSegment(), mm.applicationAddrRange(), true /* precommit */) 997 mm.activeMu.RUnlock() 998 } else { 999 mm.activeMu.Unlock() 1000 } 1001 } else { 1002 mm.mappingMu.Unlock() 1003 } 1004 return nil 1005 } 1006 1007 // NumaPolicy implements the semantics of Linux's get_mempolicy(MPOL_F_ADDR). 1008 func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint64, error) { 1009 mm.mappingMu.RLock() 1010 defer mm.mappingMu.RUnlock() 1011 vseg := mm.vmas.FindSegment(addr) 1012 if !vseg.Ok() { 1013 return 0, 0, linuxerr.EFAULT 1014 } 1015 vma := vseg.ValuePtr() 1016 return vma.numaPolicy, vma.numaNodemask, nil 1017 } 1018 1019 // SetNumaPolicy implements the semantics of Linux's mbind(). 1020 func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error { 1021 if !addr.IsPageAligned() { 1022 return linuxerr.EINVAL 1023 } 1024 // Linux allows this to overflow. 1025 la, _ := hostarch.Addr(length).RoundUp() 1026 ar, ok := addr.ToRange(uint64(la)) 1027 if !ok { 1028 return linuxerr.EINVAL 1029 } 1030 if ar.Length() == 0 { 1031 return nil 1032 } 1033 1034 mm.mappingMu.Lock() 1035 defer mm.mappingMu.Unlock() 1036 defer func() { 1037 mm.vmas.MergeRange(ar) 1038 mm.vmas.MergeAdjacent(ar) 1039 }() 1040 vseg := mm.vmas.LowerBoundSegment(ar.Start) 1041 lastEnd := ar.Start 1042 for { 1043 if !vseg.Ok() || lastEnd < vseg.Start() { 1044 // "EFAULT: ... there was an unmapped hole in the specified memory 1045 // range specified [sic] by addr and len." - mbind(2) 1046 return linuxerr.EFAULT 1047 } 1048 vseg = mm.vmas.Isolate(vseg, ar) 1049 vma := vseg.ValuePtr() 1050 vma.numaPolicy = policy 1051 vma.numaNodemask = nodemask 1052 lastEnd = vseg.End() 1053 if ar.End <= lastEnd { 1054 return nil 1055 } 1056 vseg, _ = vseg.NextNonEmpty() 1057 } 1058 } 1059 1060 // SetDontFork implements the semantics of madvise MADV_DONTFORK. 1061 func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork bool) error { 1062 ar, ok := addr.ToRange(length) 1063 if !ok { 1064 return linuxerr.EINVAL 1065 } 1066 1067 mm.mappingMu.Lock() 1068 defer mm.mappingMu.Unlock() 1069 defer func() { 1070 mm.vmas.MergeRange(ar) 1071 mm.vmas.MergeAdjacent(ar) 1072 }() 1073 1074 for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { 1075 vseg = mm.vmas.Isolate(vseg, ar) 1076 vma := vseg.ValuePtr() 1077 vma.dontfork = dontfork 1078 } 1079 1080 if mm.vmas.SpanRange(ar) != ar.Length() { 1081 return linuxerr.ENOMEM 1082 } 1083 return nil 1084 } 1085 1086 // Decommit implements the semantics of Linux's madvise(MADV_DONTNEED). 1087 func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { 1088 ar, ok := addr.ToRange(length) 1089 if !ok { 1090 return linuxerr.EINVAL 1091 } 1092 1093 mm.mappingMu.RLock() 1094 defer mm.mappingMu.RUnlock() 1095 mm.activeMu.Lock() 1096 defer mm.activeMu.Unlock() 1097 1098 // This is invalidateLocked(invalidatePrivate=true, invalidateShared=true), 1099 // with the additional wrinkle that we must refuse to invalidate pmas under 1100 // mlocked vmas. 1101 var didUnmapAS bool 1102 pseg := mm.pmas.LowerBoundSegment(ar.Start) 1103 for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { 1104 vma := vseg.ValuePtr() 1105 if vma.mlockMode != memmap.MLockNone { 1106 return linuxerr.EINVAL 1107 } 1108 vsegAR := vseg.Range().Intersect(ar) 1109 // pseg should already correspond to either this vma or a later one, 1110 // since there can't be a pma without a corresponding vma. 1111 if checkInvariants { 1112 if pseg.Ok() && pseg.End() <= vsegAR.Start { 1113 panic(fmt.Sprintf("pma %v precedes vma %v", pseg.Range(), vsegAR)) 1114 } 1115 } 1116 for pseg.Ok() && pseg.Start() < vsegAR.End { 1117 pseg = mm.pmas.Isolate(pseg, vsegAR) 1118 pma := pseg.ValuePtr() 1119 if !didUnmapAS { 1120 // Unmap all of ar, not just pseg.Range(), to minimize host 1121 // syscalls. AddressSpace mappings must be removed before 1122 // mm.decPrivateRef(). 1123 mm.unmapASLocked(ar) 1124 didUnmapAS = true 1125 } 1126 if pma.private { 1127 mm.decPrivateRef(pseg.fileRange()) 1128 } 1129 pma.file.DecRef(pseg.fileRange()) 1130 mm.removeRSSLocked(pseg.Range()) 1131 pseg = mm.pmas.Remove(pseg).NextSegment() 1132 } 1133 } 1134 1135 // "If there are some parts of the specified address space that are not 1136 // mapped, the Linux version of madvise() ignores them and applies the call 1137 // to the rest (but returns ENOMEM from the system call, as it should)." - 1138 // madvise(2) 1139 if mm.vmas.SpanRange(ar) != ar.Length() { 1140 return linuxerr.ENOMEM 1141 } 1142 return nil 1143 } 1144 1145 // MSyncOpts holds options to MSync. 1146 type MSyncOpts struct { 1147 // Sync has the semantics of MS_SYNC. 1148 Sync bool 1149 1150 // Invalidate has the semantics of MS_INVALIDATE. 1151 Invalidate bool 1152 } 1153 1154 // MSync implements the semantics of Linux's msync(). 1155 func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length uint64, opts MSyncOpts) error { 1156 if addr != addr.RoundDown() { 1157 return linuxerr.EINVAL 1158 } 1159 if length == 0 { 1160 return nil 1161 } 1162 la, ok := hostarch.Addr(length).RoundUp() 1163 if !ok { 1164 return linuxerr.ENOMEM 1165 } 1166 ar, ok := addr.ToRange(uint64(la)) 1167 if !ok { 1168 return linuxerr.ENOMEM 1169 } 1170 1171 mm.mappingMu.RLock() 1172 // Can't defer mm.mappingMu.RUnlock(); see below. 1173 vseg := mm.vmas.LowerBoundSegment(ar.Start) 1174 if !vseg.Ok() { 1175 mm.mappingMu.RUnlock() 1176 return linuxerr.ENOMEM 1177 } 1178 var unmapped bool 1179 lastEnd := ar.Start 1180 for { 1181 if !vseg.Ok() { 1182 mm.mappingMu.RUnlock() 1183 unmapped = true 1184 break 1185 } 1186 if lastEnd < vseg.Start() { 1187 unmapped = true 1188 } 1189 lastEnd = vseg.End() 1190 vma := vseg.ValuePtr() 1191 if opts.Invalidate && vma.mlockMode != memmap.MLockNone { 1192 mm.mappingMu.RUnlock() 1193 return linuxerr.EBUSY 1194 } 1195 // It's only possible to have dirtied the Mappable through a shared 1196 // mapping. Don't check if the mapping is writable, because mprotect 1197 // may have changed this, and also because Linux doesn't. 1198 if id := vma.id; opts.Sync && id != nil && vma.mappable != nil && !vma.private { 1199 // We can't call memmap.MappingIdentity.Msync while holding 1200 // mm.mappingMu since it may take fs locks that precede it in the 1201 // lock order. 1202 id.IncRef() 1203 mr := vseg.mappableRangeOf(vseg.Range().Intersect(ar)) 1204 mm.mappingMu.RUnlock() 1205 err := id.Msync(ctx, mr) 1206 id.DecRef(ctx) 1207 if err != nil { 1208 return err 1209 } 1210 if lastEnd >= ar.End { 1211 break 1212 } 1213 mm.mappingMu.RLock() 1214 vseg = mm.vmas.LowerBoundSegment(lastEnd) 1215 } else { 1216 if lastEnd >= ar.End { 1217 mm.mappingMu.RUnlock() 1218 break 1219 } 1220 vseg = vseg.NextSegment() 1221 } 1222 } 1223 1224 if unmapped { 1225 return linuxerr.ENOMEM 1226 } 1227 return nil 1228 } 1229 1230 // GetSharedFutexKey is used by kernel.Task.GetSharedKey. 1231 func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr hostarch.Addr) (futex.Key, error) { 1232 ar, ok := addr.ToRange(4) // sizeof(int32). 1233 if !ok { 1234 return futex.Key{}, linuxerr.EFAULT 1235 } 1236 1237 mm.mappingMu.RLock() 1238 defer mm.mappingMu.RUnlock() 1239 vseg, _, err := mm.getVMAsLocked(ctx, ar, hostarch.Read, false) 1240 if err != nil { 1241 return futex.Key{}, err 1242 } 1243 vma := vseg.ValuePtr() 1244 1245 if vma.private { 1246 return futex.Key{ 1247 Kind: futex.KindSharedPrivate, 1248 Offset: uint64(addr), 1249 }, nil 1250 } 1251 1252 if vma.id != nil { 1253 vma.id.IncRef() 1254 } 1255 return futex.Key{ 1256 Kind: futex.KindSharedMappable, 1257 Mappable: vma.mappable, 1258 MappingIdentity: vma.id, 1259 Offset: vseg.mappableOffsetAt(addr), 1260 }, nil 1261 } 1262 1263 // VirtualMemorySize returns the combined length in bytes of all mappings in 1264 // mm. 1265 func (mm *MemoryManager) VirtualMemorySize() uint64 { 1266 mm.mappingMu.RLock() 1267 defer mm.mappingMu.RUnlock() 1268 return mm.usageAS 1269 } 1270 1271 // VirtualMemorySizeRange returns the combined length in bytes of all mappings 1272 // in ar in mm. 1273 func (mm *MemoryManager) VirtualMemorySizeRange(ar hostarch.AddrRange) uint64 { 1274 mm.mappingMu.RLock() 1275 defer mm.mappingMu.RUnlock() 1276 return uint64(mm.vmas.SpanRange(ar)) 1277 } 1278 1279 // ResidentSetSize returns the value advertised as mm's RSS in bytes. 1280 func (mm *MemoryManager) ResidentSetSize() uint64 { 1281 mm.activeMu.RLock() 1282 defer mm.activeMu.RUnlock() 1283 return mm.curRSS 1284 } 1285 1286 // MaxResidentSetSize returns the value advertised as mm's max RSS in bytes. 1287 func (mm *MemoryManager) MaxResidentSetSize() uint64 { 1288 mm.activeMu.RLock() 1289 defer mm.activeMu.RUnlock() 1290 return mm.maxRSS 1291 } 1292 1293 // VirtualDataSize returns the size of private data segments in mm. 1294 func (mm *MemoryManager) VirtualDataSize() uint64 { 1295 mm.mappingMu.RLock() 1296 defer mm.mappingMu.RUnlock() 1297 return mm.dataAS 1298 } 1299 1300 // EnableMembarrierPrivate causes future calls to IsMembarrierPrivateEnabled to 1301 // return true. 1302 func (mm *MemoryManager) EnableMembarrierPrivate() { 1303 mm.membarrierPrivateEnabled.Store(1) 1304 } 1305 1306 // IsMembarrierPrivateEnabled returns true if mm.EnableMembarrierPrivate() has 1307 // previously been called. 1308 func (mm *MemoryManager) IsMembarrierPrivateEnabled() bool { 1309 return mm.membarrierPrivateEnabled.Load() != 0 1310 } 1311 1312 // EnableMembarrierRSeq causes future calls to IsMembarrierRSeqEnabled to 1313 // return true. 1314 func (mm *MemoryManager) EnableMembarrierRSeq() { 1315 mm.membarrierRSeqEnabled.Store(1) 1316 } 1317 1318 // IsMembarrierRSeqEnabled returns true if mm.EnableMembarrierRSeq() has 1319 // previously been called. 1320 func (mm *MemoryManager) IsMembarrierRSeqEnabled() bool { 1321 return mm.membarrierRSeqEnabled.Load() != 0 1322 } 1323 1324 // FindVMAByName finds a vma with the specified name and returns its start address and offset. 1325 func (mm *MemoryManager) FindVMAByName(ar hostarch.AddrRange, hint string) (hostarch.Addr, uint64, error) { 1326 mm.mappingMu.RLock() 1327 defer mm.mappingMu.RUnlock() 1328 1329 for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok(); vseg = vseg.NextSegment() { 1330 start := vseg.Start() 1331 if !ar.Contains(start) { 1332 break 1333 } 1334 vma := vseg.ValuePtr() 1335 1336 if vma.hint == hint { 1337 return start, vma.off, nil 1338 } 1339 } 1340 return 0, 0, fmt.Errorf("could not find \"%s\" in %s", hint, ar) 1341 }