github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/mm/pma.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mm 16 17 import ( 18 "fmt" 19 20 "github.com/SagerNet/gvisor/pkg/context" 21 "github.com/SagerNet/gvisor/pkg/hostarch" 22 "github.com/SagerNet/gvisor/pkg/safecopy" 23 "github.com/SagerNet/gvisor/pkg/safemem" 24 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 25 "github.com/SagerNet/gvisor/pkg/sentry/usage" 26 "github.com/SagerNet/gvisor/pkg/syserror" 27 ) 28 29 // existingPMAsLocked checks that pmas exist for all addresses in ar, and 30 // support access of type (at, ignorePermissions). If so, it returns an 31 // iterator to the pma containing ar.Start. Otherwise it returns a terminal 32 // iterator. 33 // 34 // Preconditions: 35 // * mm.activeMu must be locked. 36 // * ar.Length() != 0. 37 func (mm *MemoryManager) existingPMAsLocked(ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator { 38 if checkInvariants { 39 if !ar.WellFormed() || ar.Length() == 0 { 40 panic(fmt.Sprintf("invalid ar: %v", ar)) 41 } 42 } 43 44 first := mm.pmas.FindSegment(ar.Start) 45 pseg := first 46 for pseg.Ok() { 47 pma := pseg.ValuePtr() 48 perms := pma.effectivePerms 49 if ignorePermissions { 50 perms = pma.maxPerms 51 } 52 if !perms.SupersetOf(at) { 53 return pmaIterator{} 54 } 55 if needInternalMappings && pma.internalMappings.IsEmpty() { 56 return pmaIterator{} 57 } 58 59 if ar.End <= pseg.End() { 60 return first 61 } 62 pseg, _ = pseg.NextNonEmpty() 63 } 64 65 // Ran out of pmas before reaching ar.End. 66 return pmaIterator{} 67 } 68 69 // existingVecPMAsLocked returns true if pmas exist for all addresses in ars, 70 // and support access of type (at, ignorePermissions). 71 // 72 // Preconditions: mm.activeMu must be locked. 73 func (mm *MemoryManager) existingVecPMAsLocked(ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) bool { 74 for ; !ars.IsEmpty(); ars = ars.Tail() { 75 if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() { 76 return false 77 } 78 } 79 return true 80 } 81 82 // getPMAsLocked ensures that pmas exist for all addresses in ar, and support 83 // access of type at. It returns: 84 // 85 // - An iterator to the pma containing ar.Start. If no pma contains ar.Start, 86 // the iterator is unspecified. 87 // 88 // - An iterator to the gap after the last pma containing an address in ar. If 89 // pmas exist for no addresses in ar, the iterator is to a gap that begins 90 // before ar.Start. 91 // 92 // - An error that is non-nil if pmas exist for only a subset of ar. 93 // 94 // Preconditions: 95 // * mm.mappingMu must be locked. 96 // * mm.activeMu must be locked for writing. 97 // * ar.Length() != 0. 98 // * vseg.Range().Contains(ar.Start). 99 // * vmas must exist for all addresses in ar, and support accesses of type at 100 // (i.e. permission checks must have been performed against vmas). 101 func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { 102 if checkInvariants { 103 if !ar.WellFormed() || ar.Length() == 0 { 104 panic(fmt.Sprintf("invalid ar: %v", ar)) 105 } 106 if !vseg.Ok() { 107 panic("terminal vma iterator") 108 } 109 if !vseg.Range().Contains(ar.Start) { 110 panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar)) 111 } 112 } 113 114 // Page-align ar so that all AddrRanges are aligned. 115 end, ok := ar.End.RoundUp() 116 var alignerr error 117 if !ok { 118 end = ar.End.RoundDown() 119 alignerr = syserror.EFAULT 120 } 121 ar = hostarch.AddrRange{ar.Start.RoundDown(), end} 122 123 pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at) 124 if pend.Start() <= ar.Start { 125 return pmaIterator{}, pend, perr 126 } 127 // getPMAsInternalLocked may not have returned pstart due to iterator 128 // invalidation. 129 if !pstart.Ok() { 130 pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend) 131 } 132 if perr != nil { 133 return pstart, pend, perr 134 } 135 return pstart, pend, alignerr 136 } 137 138 // getVecPMAsLocked ensures that pmas exist for all addresses in ars, and 139 // support access of type at. It returns the subset of ars for which pmas 140 // exist. If this is not equal to ars, it returns a non-nil error explaining 141 // why. 142 // 143 // Preconditions: 144 // * mm.mappingMu must be locked. 145 // * mm.activeMu must be locked for writing. 146 // * vmas must exist for all addresses in ars, and support accesses of type at 147 // (i.e. permission checks must have been performed against vmas). 148 func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType) (hostarch.AddrRangeSeq, error) { 149 for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { 150 ar := arsit.Head() 151 if ar.Length() == 0 { 152 continue 153 } 154 if checkInvariants { 155 if !ar.WellFormed() { 156 panic(fmt.Sprintf("invalid ar: %v", ar)) 157 } 158 } 159 160 // Page-align ar so that all AddrRanges are aligned. 161 end, ok := ar.End.RoundUp() 162 var alignerr error 163 if !ok { 164 end = ar.End.RoundDown() 165 alignerr = syserror.EFAULT 166 } 167 ar = hostarch.AddrRange{ar.Start.RoundDown(), end} 168 169 _, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at) 170 if perr != nil { 171 return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr 172 } 173 if alignerr != nil { 174 return truncatedAddrRangeSeq(ars, arsit, pend.Start()), alignerr 175 } 176 } 177 178 return ars, nil 179 } 180 181 // getPMAsInternalLocked is equivalent to getPMAsLocked, with the following 182 // exceptions: 183 // 184 // - getPMAsInternalLocked returns a pmaIterator on a best-effort basis (that 185 // is, the returned iterator may be terminal, even if a pma that contains 186 // ar.Start exists). Returning this iterator on a best-effort basis allows 187 // callers that require it to use it when it's cheaply available, while also 188 // avoiding the overhead of retrieving it when it's not. 189 // 190 // - getPMAsInternalLocked additionally requires that ar is page-aligned. 191 // 192 // getPMAsInternalLocked is an implementation helper for getPMAsLocked and 193 // getVecPMAsLocked; other clients should call one of those instead. 194 func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { 195 if checkInvariants { 196 if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { 197 panic(fmt.Sprintf("invalid ar: %v", ar)) 198 } 199 if !vseg.Ok() { 200 panic("terminal vma iterator") 201 } 202 if !vseg.Range().Contains(ar.Start) { 203 panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar)) 204 } 205 } 206 207 mf := mm.mfp.MemoryFile() 208 // Limit the range we allocate to ar, aligned to privateAllocUnit. 209 maskAR := privateAligned(ar) 210 didUnmapAS := false 211 // The range in which we iterate vmas and pmas is still limited to ar, to 212 // ensure that we don't allocate or COW-break a pma we don't need. 213 pseg, pgap := mm.pmas.Find(ar.Start) 214 pstart := pseg 215 for { 216 // Get pmas for this vma. 217 vsegAR := vseg.Range().Intersect(ar) 218 vma := vseg.ValuePtr() 219 pmaLoop: 220 for { 221 switch { 222 case pgap.Ok() && pgap.Start() < vsegAR.End: 223 // Need a pma here. 224 optAR := vseg.Range().Intersect(pgap.Range()) 225 if checkInvariants { 226 if optAR.Length() == 0 { 227 panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap)) 228 } 229 } 230 if vma.mappable == nil { 231 // Private anonymous mappings get pmas by allocating. 232 allocAR := optAR.Intersect(maskAR) 233 fr, err := mf.Allocate(uint64(allocAR.Length()), usage.Anonymous) 234 if err != nil { 235 return pstart, pgap, err 236 } 237 if checkInvariants { 238 if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) { 239 panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr)) 240 } 241 } 242 mm.addRSSLocked(allocAR) 243 mm.incPrivateRef(fr) 244 mf.IncRef(fr) 245 pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{ 246 file: mf, 247 off: fr.Start, 248 translatePerms: hostarch.AnyAccess, 249 effectivePerms: vma.effectivePerms, 250 maxPerms: vma.maxPerms, 251 // Since we just allocated this memory and have the 252 // only reference, the new pma does not need 253 // copy-on-write. 254 private: true, 255 }).NextNonEmpty() 256 pstart = pmaIterator{} // iterators invalidated 257 } else { 258 // Other mappings get pmas by translating. 259 optMR := vseg.mappableRangeOf(optAR) 260 reqAR := optAR.Intersect(ar) 261 reqMR := vseg.mappableRangeOf(reqAR) 262 perms := at 263 if vma.private { 264 // This pma will be copy-on-write; don't require write 265 // permission, but do require read permission to 266 // facilitate the copy. 267 // 268 // If at.Write is true, we will need to break 269 // copy-on-write immediately, which occurs after 270 // translation below. 271 perms.Read = true 272 perms.Write = false 273 } 274 ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) 275 if checkInvariants { 276 if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { 277 panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) 278 } 279 } 280 // Install a pma for each translation. 281 if len(ts) == 0 { 282 return pstart, pgap, err 283 } 284 pstart = pmaIterator{} // iterators invalidated 285 for _, t := range ts { 286 newpmaAR := vseg.addrRangeOf(t.Source) 287 newpma := pma{ 288 file: t.File, 289 off: t.Offset, 290 translatePerms: t.Perms, 291 effectivePerms: vma.effectivePerms.Intersect(t.Perms), 292 maxPerms: vma.maxPerms.Intersect(t.Perms), 293 } 294 if vma.private { 295 newpma.effectivePerms.Write = false 296 newpma.maxPerms.Write = false 297 newpma.needCOW = true 298 } 299 mm.addRSSLocked(newpmaAR) 300 t.File.IncRef(t.FileRange()) 301 // This is valid because memmap.Mappable.Translate is 302 // required to return Translations in increasing 303 // Translation.Source order. 304 pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) 305 pgap = pseg.NextGap() 306 } 307 // The error returned by Translate is only significant if 308 // it occurred before ar.End. 309 if err != nil && vseg.addrRangeOf(ts[len(ts)-1].Source).End < ar.End { 310 return pstart, pgap, err 311 } 312 // Rewind pseg to the first pma inserted and continue the 313 // loop to check if we need to break copy-on-write. 314 pseg, pgap = mm.findOrSeekPrevUpperBoundPMA(vseg.addrRangeOf(ts[0].Source).Start, pgap), pmaGapIterator{} 315 continue 316 } 317 318 case pseg.Ok() && pseg.Start() < vsegAR.End: 319 oldpma := pseg.ValuePtr() 320 if at.Write && mm.isPMACopyOnWriteLocked(vseg, pseg) { 321 // Break copy-on-write by copying. 322 if checkInvariants { 323 if !oldpma.maxPerms.Read { 324 panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma)) 325 } 326 } 327 // The majority of copy-on-write breaks on executable pages 328 // come from: 329 // 330 // - The ELF loader, which must zero out bytes on the last 331 // page of each segment after the end of the segment. 332 // 333 // - gdb's use of ptrace to insert breakpoints. 334 // 335 // Neither of these cases has enough spatial locality to 336 // benefit from copying nearby pages, so if the vma is 337 // executable, only copy the pages required. 338 var copyAR hostarch.AddrRange 339 if vseg.ValuePtr().effectivePerms.Execute { 340 copyAR = pseg.Range().Intersect(ar) 341 } else { 342 copyAR = pseg.Range().Intersect(maskAR) 343 } 344 // Get internal mappings from the pma to copy from. 345 if err := pseg.getInternalMappingsLocked(); err != nil { 346 return pstart, pseg.PrevGap(), err 347 } 348 // Copy contents. 349 fr, err := mf.AllocateAndFill(uint64(copyAR.Length()), usage.Anonymous, &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)}) 350 if _, ok := err.(safecopy.BusError); ok { 351 // If we got SIGBUS during the copy, deliver SIGBUS to 352 // userspace (instead of SIGSEGV) if we're breaking 353 // copy-on-write due to application page fault. 354 err = &memmap.BusError{err} 355 } 356 if fr.Length() == 0 { 357 return pstart, pseg.PrevGap(), err 358 } 359 // Unmap all of maskAR, not just copyAR, to minimize host 360 // syscalls. AddressSpace mappings must be removed before 361 // mm.decPrivateRef(). 362 if !didUnmapAS { 363 mm.unmapASLocked(maskAR) 364 didUnmapAS = true 365 } 366 // Replace the pma with a copy in the part of the address 367 // range where copying was successful. This doesn't change 368 // RSS. 369 copyAR.End = copyAR.Start + hostarch.Addr(fr.Length()) 370 if copyAR != pseg.Range() { 371 pseg = mm.pmas.Isolate(pseg, copyAR) 372 pstart = pmaIterator{} // iterators invalidated 373 } 374 oldpma = pseg.ValuePtr() 375 if oldpma.private { 376 mm.decPrivateRef(pseg.fileRange()) 377 } 378 oldpma.file.DecRef(pseg.fileRange()) 379 mm.incPrivateRef(fr) 380 mf.IncRef(fr) 381 oldpma.file = mf 382 oldpma.off = fr.Start 383 oldpma.translatePerms = hostarch.AnyAccess 384 oldpma.effectivePerms = vma.effectivePerms 385 oldpma.maxPerms = vma.maxPerms 386 oldpma.needCOW = false 387 oldpma.private = true 388 oldpma.internalMappings = safemem.BlockSeq{} 389 // Try to merge the pma with its neighbors. 390 if prev := pseg.PrevSegment(); prev.Ok() { 391 if merged := mm.pmas.Merge(prev, pseg); merged.Ok() { 392 pseg = merged 393 pstart = pmaIterator{} // iterators invalidated 394 } 395 } 396 if next := pseg.NextSegment(); next.Ok() { 397 if merged := mm.pmas.Merge(pseg, next); merged.Ok() { 398 pseg = merged 399 pstart = pmaIterator{} // iterators invalidated 400 } 401 } 402 // The error returned by AllocateAndFill is only 403 // significant if it occurred before ar.End. 404 if err != nil && pseg.End() < ar.End { 405 return pstart, pseg.NextGap(), err 406 } 407 // Ensure pseg and pgap are correct for the next iteration 408 // of the loop. 409 pseg, pgap = pseg.NextNonEmpty() 410 } else if !oldpma.translatePerms.SupersetOf(at) { 411 // Get new pmas (with sufficient permissions) by calling 412 // memmap.Mappable.Translate again. 413 if checkInvariants { 414 if oldpma.private { 415 panic(fmt.Sprintf("private pma %v has non-maximal pma.translatePerms: %v", pseg.Range(), oldpma)) 416 } 417 } 418 // Allow the entire pma to be replaced. 419 optAR := pseg.Range() 420 optMR := vseg.mappableRangeOf(optAR) 421 reqAR := optAR.Intersect(ar) 422 reqMR := vseg.mappableRangeOf(reqAR) 423 perms := oldpma.translatePerms.Union(at) 424 ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) 425 if checkInvariants { 426 if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { 427 panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) 428 } 429 } 430 // Remove the part of the existing pma covered by new 431 // Translations, then insert new pmas. This doesn't change 432 // RSS. Note that we don't need to call unmapASLocked: any 433 // existing AddressSpace mappings are still valid (though 434 // less permissive than the new pmas indicate) until 435 // Invalidate is called, and will be replaced by future 436 // calls to mapASLocked. 437 if len(ts) == 0 { 438 return pstart, pseg.PrevGap(), err 439 } 440 transMR := memmap.MappableRange{ts[0].Source.Start, ts[len(ts)-1].Source.End} 441 transAR := vseg.addrRangeOf(transMR) 442 pseg = mm.pmas.Isolate(pseg, transAR) 443 pseg.ValuePtr().file.DecRef(pseg.fileRange()) 444 pgap = mm.pmas.Remove(pseg) 445 pstart = pmaIterator{} // iterators invalidated 446 for _, t := range ts { 447 newpmaAR := vseg.addrRangeOf(t.Source) 448 newpma := pma{ 449 file: t.File, 450 off: t.Offset, 451 translatePerms: t.Perms, 452 effectivePerms: vma.effectivePerms.Intersect(t.Perms), 453 maxPerms: vma.maxPerms.Intersect(t.Perms), 454 } 455 if vma.private { 456 newpma.effectivePerms.Write = false 457 newpma.maxPerms.Write = false 458 newpma.needCOW = true 459 } 460 t.File.IncRef(t.FileRange()) 461 pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) 462 pgap = pseg.NextGap() 463 } 464 // The error returned by Translate is only significant if 465 // it occurred before ar.End. 466 if err != nil && pseg.End() < ar.End { 467 return pstart, pgap, err 468 } 469 // Ensure pseg and pgap are correct for the next iteration 470 // of the loop. 471 if pgap.Range().Length() == 0 { 472 pseg, pgap = pgap.NextSegment(), pmaGapIterator{} 473 } else { 474 pseg = pmaIterator{} 475 } 476 } else { 477 // We have a usable pma; continue. 478 pseg, pgap = pseg.NextNonEmpty() 479 } 480 481 default: 482 break pmaLoop 483 } 484 } 485 // Go to the next vma. 486 if ar.End <= vseg.End() { 487 if pgap.Ok() { 488 return pstart, pgap, nil 489 } 490 return pstart, pseg.PrevGap(), nil 491 } 492 vseg = vseg.NextSegment() 493 } 494 } 495 496 const ( 497 // When memory is allocated for a private pma, align the allocated address 498 // range to a privateAllocUnit boundary when possible. Larger values of 499 // privateAllocUnit may reduce page faults by allowing fewer, larger pmas 500 // to be mapped, but may result in larger amounts of wasted memory in the 501 // presence of fragmentation. privateAllocUnit must be a power-of-2 502 // multiple of hostarch.PageSize. 503 privateAllocUnit = hostarch.HugePageSize 504 505 privateAllocMask = privateAllocUnit - 1 506 ) 507 508 func privateAligned(ar hostarch.AddrRange) hostarch.AddrRange { 509 aligned := hostarch.AddrRange{ar.Start &^ privateAllocMask, ar.End} 510 if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End { 511 aligned.End = end 512 } 513 if checkInvariants { 514 if !aligned.IsSupersetOf(ar) { 515 panic(fmt.Sprintf("aligned AddrRange %#v is not a superset of ar %#v", aligned, ar)) 516 } 517 } 518 return aligned 519 } 520 521 // isPMACopyOnWriteLocked returns true if the contents of the pma represented 522 // by pseg must be copied to a new private pma to be written to. 523 // 524 // If the pma is a copy-on-write private pma, and holds the only reference on 525 // the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory 526 // and update the pma to indicate that it does not require copy-on-write. 527 // 528 // Preconditions: 529 // * vseg.Range().IsSupersetOf(pseg.Range()). 530 // * mm.mappingMu must be locked. 531 // * mm.activeMu must be locked for writing. 532 func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool { 533 pma := pseg.ValuePtr() 534 if !pma.needCOW { 535 return false 536 } 537 if !pma.private { 538 return true 539 } 540 // If we have the only reference on private memory to be copied, just take 541 // ownership of it instead of copying. If we do hold the only reference, 542 // additional references can only be taken by mm.Fork(), which is excluded 543 // by mm.activeMu, so this isn't racy. 544 mm.privateRefs.mu.Lock() 545 defer mm.privateRefs.mu.Unlock() 546 fr := pseg.fileRange() 547 // This check relies on mm.privateRefs.refs being kept fully merged. 548 rseg := mm.privateRefs.refs.FindSegment(fr.Start) 549 if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() { 550 pma.needCOW = false 551 // pma.private => pma.translatePerms == hostarch.AnyAccess 552 vma := vseg.ValuePtr() 553 pma.effectivePerms = vma.effectivePerms 554 pma.maxPerms = vma.maxPerms 555 return false 556 } 557 return true 558 } 559 560 // Invalidate implements memmap.MappingSpace.Invalidate. 561 func (mm *MemoryManager) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) { 562 if checkInvariants { 563 if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { 564 panic(fmt.Sprintf("invalid ar: %v", ar)) 565 } 566 } 567 568 mm.activeMu.Lock() 569 defer mm.activeMu.Unlock() 570 if mm.captureInvalidations { 571 mm.capturedInvalidations = append(mm.capturedInvalidations, invalidateArgs{ar, opts}) 572 return 573 } 574 mm.invalidateLocked(ar, opts.InvalidatePrivate, true) 575 } 576 577 // invalidateLocked removes pmas and AddressSpace mappings of those pmas for 578 // addresses in ar. 579 // 580 // Preconditions: 581 // * mm.activeMu must be locked for writing. 582 // * ar.Length() != 0. 583 // * ar must be page-aligned. 584 func (mm *MemoryManager) invalidateLocked(ar hostarch.AddrRange, invalidatePrivate, invalidateShared bool) { 585 if checkInvariants { 586 if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { 587 panic(fmt.Sprintf("invalid ar: %v", ar)) 588 } 589 } 590 591 var didUnmapAS bool 592 pseg := mm.pmas.LowerBoundSegment(ar.Start) 593 for pseg.Ok() && pseg.Start() < ar.End { 594 pma := pseg.ValuePtr() 595 if (invalidatePrivate && pma.private) || (invalidateShared && !pma.private) { 596 pseg = mm.pmas.Isolate(pseg, ar) 597 pma = pseg.ValuePtr() 598 if !didUnmapAS { 599 // Unmap all of ar, not just pseg.Range(), to minimize host 600 // syscalls. AddressSpace mappings must be removed before 601 // mm.decPrivateRef(). 602 mm.unmapASLocked(ar) 603 didUnmapAS = true 604 } 605 if pma.private { 606 mm.decPrivateRef(pseg.fileRange()) 607 } 608 mm.removeRSSLocked(pseg.Range()) 609 pma.file.DecRef(pseg.fileRange()) 610 pseg = mm.pmas.Remove(pseg).NextSegment() 611 } else { 612 pseg = pseg.NextSegment() 613 } 614 } 615 } 616 617 // Pin returns the memmap.File ranges currently mapped by addresses in ar in 618 // mm, acquiring a reference on the returned ranges which the caller must 619 // release by calling Unpin. If not all addresses are mapped, Pin returns a 620 // non-nil error. Note that Pin may return both a non-empty slice of 621 // PinnedRanges and a non-nil error. 622 // 623 // Pin does not prevent mapped ranges from changing, making it unsuitable for 624 // most I/O. It should only be used in contexts that would use get_user_pages() 625 // in the Linux kernel. 626 // 627 // Preconditions: 628 // * ar.Length() != 0. 629 // * ar must be page-aligned. 630 func (mm *MemoryManager) Pin(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) ([]PinnedRange, error) { 631 if checkInvariants { 632 if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { 633 panic(fmt.Sprintf("invalid ar: %v", ar)) 634 } 635 } 636 637 // Ensure that we have usable vmas. 638 mm.mappingMu.RLock() 639 vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions) 640 if vendaddr := vend.Start(); vendaddr < ar.End { 641 if vendaddr <= ar.Start { 642 mm.mappingMu.RUnlock() 643 return nil, verr 644 } 645 ar.End = vendaddr 646 } 647 648 // Ensure that we have usable pmas. 649 mm.activeMu.Lock() 650 pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at) 651 mm.mappingMu.RUnlock() 652 if pendaddr := pend.Start(); pendaddr < ar.End { 653 if pendaddr <= ar.Start { 654 mm.activeMu.Unlock() 655 return nil, perr 656 } 657 ar.End = pendaddr 658 } 659 660 // Gather pmas. 661 var prs []PinnedRange 662 for pseg.Ok() && pseg.Start() < ar.End { 663 psar := pseg.Range().Intersect(ar) 664 f := pseg.ValuePtr().file 665 fr := pseg.fileRangeOf(psar) 666 f.IncRef(fr) 667 prs = append(prs, PinnedRange{ 668 Source: psar, 669 File: f, 670 Offset: fr.Start, 671 }) 672 pseg = pseg.NextSegment() 673 } 674 mm.activeMu.Unlock() 675 676 // Return the first error in order of progress through ar. 677 if perr != nil { 678 return prs, perr 679 } 680 return prs, verr 681 } 682 683 // PinnedRanges are returned by MemoryManager.Pin. 684 type PinnedRange struct { 685 // Source is the corresponding range of addresses. 686 Source hostarch.AddrRange 687 688 // File is the mapped file. 689 File memmap.File 690 691 // Offset is the offset into File at which this PinnedRange begins. 692 Offset uint64 693 } 694 695 // FileRange returns the memmap.File offsets mapped by pr. 696 func (pr PinnedRange) FileRange() memmap.FileRange { 697 return memmap.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())} 698 } 699 700 // Unpin releases the reference held by prs. 701 func Unpin(prs []PinnedRange) { 702 for i := range prs { 703 prs[i].File.DecRef(prs[i].FileRange()) 704 } 705 } 706 707 // movePMAsLocked moves all pmas in oldAR to newAR. 708 // 709 // Preconditions: 710 // * mm.activeMu must be locked for writing. 711 // * oldAR.Length() != 0. 712 // * oldAR.Length() <= newAR.Length(). 713 // * !oldAR.Overlaps(newAR). 714 // * mm.pmas.IsEmptyRange(newAR). 715 // * oldAR and newAR must be page-aligned. 716 func (mm *MemoryManager) movePMAsLocked(oldAR, newAR hostarch.AddrRange) { 717 if checkInvariants { 718 if !oldAR.WellFormed() || oldAR.Length() == 0 || !oldAR.IsPageAligned() { 719 panic(fmt.Sprintf("invalid oldAR: %v", oldAR)) 720 } 721 if !newAR.WellFormed() || newAR.Length() == 0 || !newAR.IsPageAligned() { 722 panic(fmt.Sprintf("invalid newAR: %v", newAR)) 723 } 724 if oldAR.Length() > newAR.Length() { 725 panic(fmt.Sprintf("old address range %v may contain pmas that will not fit in new address range %v", oldAR, newAR)) 726 } 727 if oldAR.Overlaps(newAR) { 728 panic(fmt.Sprintf("old and new address ranges overlap: %v, %v", oldAR, newAR)) 729 } 730 // mm.pmas.IsEmptyRange is checked by mm.pmas.Insert. 731 } 732 733 type movedPMA struct { 734 oldAR hostarch.AddrRange 735 pma pma 736 } 737 var movedPMAs []movedPMA 738 pseg := mm.pmas.LowerBoundSegment(oldAR.Start) 739 for pseg.Ok() && pseg.Start() < oldAR.End { 740 pseg = mm.pmas.Isolate(pseg, oldAR) 741 movedPMAs = append(movedPMAs, movedPMA{ 742 oldAR: pseg.Range(), 743 pma: pseg.Value(), 744 }) 745 pseg = mm.pmas.Remove(pseg).NextSegment() 746 // No RSS change is needed since we're re-inserting the same pmas 747 // below. 748 } 749 750 off := newAR.Start - oldAR.Start 751 pgap := mm.pmas.FindGap(newAR.Start) 752 for i := range movedPMAs { 753 mpma := &movedPMAs[i] 754 pmaNewAR := hostarch.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off} 755 pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap() 756 } 757 758 mm.unmapASLocked(oldAR) 759 } 760 761 // getPMAInternalMappingsLocked ensures that pmas for all addresses in ar have 762 // cached internal mappings. It returns: 763 // 764 // - An iterator to the gap after the last pma with internal mappings 765 // containing an address in ar. If internal mappings exist for no addresses in 766 // ar, the iterator is to a gap that begins before ar.Start. 767 // 768 // - An error that is non-nil if internal mappings exist for only a subset of 769 // ar. 770 // 771 // Preconditions: 772 // * mm.activeMu must be locked for writing. 773 // * pseg.Range().Contains(ar.Start). 774 // * pmas must exist for all addresses in ar. 775 // * ar.Length() != 0. 776 // 777 // Postconditions: getPMAInternalMappingsLocked does not invalidate iterators 778 // into mm.pmas. 779 func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) (pmaGapIterator, error) { 780 if checkInvariants { 781 if !ar.WellFormed() || ar.Length() == 0 { 782 panic(fmt.Sprintf("invalid ar: %v", ar)) 783 } 784 if !pseg.Range().Contains(ar.Start) { 785 panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar)) 786 } 787 } 788 789 for { 790 if err := pseg.getInternalMappingsLocked(); err != nil { 791 return pseg.PrevGap(), err 792 } 793 if ar.End <= pseg.End() { 794 return pseg.NextGap(), nil 795 } 796 pseg, _ = pseg.NextNonEmpty() 797 } 798 } 799 800 // getVecPMAInternalMappingsLocked ensures that pmas for all addresses in ars 801 // have cached internal mappings. It returns the subset of ars for which 802 // internal mappings exist. If this is not equal to ars, it returns a non-nil 803 // error explaining why. 804 // 805 // Preconditions: 806 // * mm.activeMu must be locked for writing. 807 // * pmas must exist for all addresses in ar. 808 // 809 // Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators 810 // into mm.pmas. 811 func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars hostarch.AddrRangeSeq) (hostarch.AddrRangeSeq, error) { 812 for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { 813 ar := arsit.Head() 814 if ar.Length() == 0 { 815 continue 816 } 817 if pend, err := mm.getPMAInternalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); err != nil { 818 return truncatedAddrRangeSeq(ars, arsit, pend.Start()), err 819 } 820 } 821 return ars, nil 822 } 823 824 // internalMappingsLocked returns internal mappings for addresses in ar. 825 // 826 // Preconditions: 827 // * mm.activeMu must be locked. 828 // * Internal mappings must have been previously established for all addresses 829 // in ar. 830 // * ar.Length() != 0. 831 // * pseg.Range().Contains(ar.Start). 832 func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) safemem.BlockSeq { 833 if checkInvariants { 834 if !ar.WellFormed() || ar.Length() == 0 { 835 panic(fmt.Sprintf("invalid ar: %v", ar)) 836 } 837 if !pseg.Range().Contains(ar.Start) { 838 panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar)) 839 } 840 } 841 842 if ar.End <= pseg.End() { 843 // Since only one pma is involved, we can use pma.internalMappings 844 // directly, avoiding a slice allocation. 845 offset := uint64(ar.Start - pseg.Start()) 846 return pseg.ValuePtr().internalMappings.DropFirst64(offset).TakeFirst64(uint64(ar.Length())) 847 } 848 849 var ims []safemem.Block 850 for { 851 pr := pseg.Range().Intersect(ar) 852 for pims := pseg.ValuePtr().internalMappings.DropFirst64(uint64(pr.Start - pseg.Start())).TakeFirst64(uint64(pr.Length())); !pims.IsEmpty(); pims = pims.Tail() { 853 ims = append(ims, pims.Head()) 854 } 855 if ar.End <= pseg.End() { 856 break 857 } 858 pseg = pseg.NextSegment() 859 } 860 return safemem.BlockSeqFromSlice(ims) 861 } 862 863 // vecInternalMappingsLocked returns internal mappings for addresses in ars. 864 // 865 // Preconditions: 866 // * mm.activeMu must be locked. 867 // * Internal mappings must have been previously established for all addresses 868 // in ars. 869 func (mm *MemoryManager) vecInternalMappingsLocked(ars hostarch.AddrRangeSeq) safemem.BlockSeq { 870 var ims []safemem.Block 871 for ; !ars.IsEmpty(); ars = ars.Tail() { 872 ar := ars.Head() 873 if ar.Length() == 0 { 874 continue 875 } 876 for pims := mm.internalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); !pims.IsEmpty(); pims = pims.Tail() { 877 ims = append(ims, pims.Head()) 878 } 879 } 880 return safemem.BlockSeqFromSlice(ims) 881 } 882 883 // incPrivateRef acquires a reference on private pages in fr. 884 func (mm *MemoryManager) incPrivateRef(fr memmap.FileRange) { 885 mm.privateRefs.mu.Lock() 886 defer mm.privateRefs.mu.Unlock() 887 refSet := &mm.privateRefs.refs 888 seg, gap := refSet.Find(fr.Start) 889 for { 890 switch { 891 case seg.Ok() && seg.Start() < fr.End: 892 seg = refSet.Isolate(seg, fr) 893 seg.SetValue(seg.Value() + 1) 894 seg, gap = seg.NextNonEmpty() 895 case gap.Ok() && gap.Start() < fr.End: 896 seg, gap = refSet.InsertWithoutMerging(gap, gap.Range().Intersect(fr), 1).NextNonEmpty() 897 default: 898 refSet.MergeAdjacent(fr) 899 return 900 } 901 } 902 } 903 904 // decPrivateRef releases a reference on private pages in fr. 905 func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) { 906 var freed []memmap.FileRange 907 908 mm.privateRefs.mu.Lock() 909 refSet := &mm.privateRefs.refs 910 seg := refSet.LowerBoundSegment(fr.Start) 911 for seg.Ok() && seg.Start() < fr.End { 912 seg = refSet.Isolate(seg, fr) 913 if old := seg.Value(); old == 1 { 914 freed = append(freed, seg.Range()) 915 seg = refSet.Remove(seg).NextSegment() 916 } else { 917 seg.SetValue(old - 1) 918 seg = seg.NextSegment() 919 } 920 } 921 refSet.MergeAdjacent(fr) 922 mm.privateRefs.mu.Unlock() 923 924 mf := mm.mfp.MemoryFile() 925 for _, fr := range freed { 926 mf.DecRef(fr) 927 } 928 } 929 930 // addRSSLocked updates the current and maximum resident set size of a 931 // MemoryManager to reflect the insertion of a pma at ar. 932 // 933 // Preconditions: mm.activeMu must be locked for writing. 934 func (mm *MemoryManager) addRSSLocked(ar hostarch.AddrRange) { 935 mm.curRSS += uint64(ar.Length()) 936 if mm.curRSS > mm.maxRSS { 937 mm.maxRSS = mm.curRSS 938 } 939 } 940 941 // removeRSSLocked updates the current resident set size of a MemoryManager to 942 // reflect the removal of a pma at ar. 943 // 944 // Preconditions: mm.activeMu must be locked for writing. 945 func (mm *MemoryManager) removeRSSLocked(ar hostarch.AddrRange) { 946 mm.curRSS -= uint64(ar.Length()) 947 } 948 949 // pmaSetFunctions implements segment.Functions for pmaSet. 950 type pmaSetFunctions struct{} 951 952 func (pmaSetFunctions) MinKey() hostarch.Addr { 953 return 0 954 } 955 956 func (pmaSetFunctions) MaxKey() hostarch.Addr { 957 return ^hostarch.Addr(0) 958 } 959 960 func (pmaSetFunctions) ClearValue(pma *pma) { 961 pma.file = nil 962 pma.internalMappings = safemem.BlockSeq{} 963 } 964 965 func (pmaSetFunctions) Merge(ar1 hostarch.AddrRange, pma1 pma, ar2 hostarch.AddrRange, pma2 pma) (pma, bool) { 966 if pma1.file != pma2.file || 967 pma1.off+uint64(ar1.Length()) != pma2.off || 968 pma1.translatePerms != pma2.translatePerms || 969 pma1.effectivePerms != pma2.effectivePerms || 970 pma1.maxPerms != pma2.maxPerms || 971 pma1.needCOW != pma2.needCOW || 972 pma1.private != pma2.private { 973 return pma{}, false 974 } 975 976 // Discard internal mappings instead of trying to merge them, since merging 977 // them requires an allocation and getting them again from the 978 // memmap.File might not. 979 pma1.internalMappings = safemem.BlockSeq{} 980 return pma1, true 981 } 982 983 func (pmaSetFunctions) Split(ar hostarch.AddrRange, p pma, split hostarch.Addr) (pma, pma) { 984 newlen1 := uint64(split - ar.Start) 985 p2 := p 986 p2.off += newlen1 987 if !p.internalMappings.IsEmpty() { 988 p.internalMappings = p.internalMappings.TakeFirst64(newlen1) 989 p2.internalMappings = p2.internalMappings.DropFirst64(newlen1) 990 } 991 return p, p2 992 } 993 994 // findOrSeekPrevUpperBoundPMA returns mm.pmas.UpperBoundSegment(addr), but may do 995 // so by scanning linearly backward from pgap. 996 // 997 // Preconditions: 998 // * mm.activeMu must be locked. 999 // * addr <= pgap.Start(). 1000 func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr hostarch.Addr, pgap pmaGapIterator) pmaIterator { 1001 if checkInvariants { 1002 if !pgap.Ok() { 1003 panic("terminal pma iterator") 1004 } 1005 if addr > pgap.Start() { 1006 panic(fmt.Sprintf("can't seek backward to %#x from %#x", addr, pgap.Start())) 1007 } 1008 } 1009 // Optimistically check if pgap.PrevSegment() is the PMA we're looking for, 1010 // which is the case if findOrSeekPrevUpperBoundPMA is called to find the 1011 // start of a range containing only a single PMA. 1012 if pseg := pgap.PrevSegment(); pseg.Start() <= addr { 1013 return pseg 1014 } 1015 return mm.pmas.UpperBoundSegment(addr) 1016 } 1017 1018 // getInternalMappingsLocked ensures that pseg.ValuePtr().internalMappings is 1019 // non-empty. 1020 // 1021 // Preconditions: mm.activeMu must be locked for writing. 1022 func (pseg pmaIterator) getInternalMappingsLocked() error { 1023 pma := pseg.ValuePtr() 1024 if pma.internalMappings.IsEmpty() { 1025 // This must use maxPerms (instead of perms) because some permission 1026 // constraints are only visible to vmas; for example, mappings of 1027 // read-only files have vma.maxPerms.Write unset, but this may not be 1028 // visible to the memmap.Mappable. 1029 perms := pma.maxPerms 1030 // We will never execute application code through an internal mapping. 1031 perms.Execute = false 1032 ims, err := pma.file.MapInternal(pseg.fileRange(), perms) 1033 if err != nil { 1034 return err 1035 } 1036 pma.internalMappings = ims 1037 } 1038 return nil 1039 } 1040 1041 func (pseg pmaIterator) fileRange() memmap.FileRange { 1042 return pseg.fileRangeOf(pseg.Range()) 1043 } 1044 1045 // Preconditions: 1046 // * pseg.Range().IsSupersetOf(ar). 1047 // * ar.Length != 0. 1048 func (pseg pmaIterator) fileRangeOf(ar hostarch.AddrRange) memmap.FileRange { 1049 if checkInvariants { 1050 if !pseg.Ok() { 1051 panic("terminal pma iterator") 1052 } 1053 if !ar.WellFormed() || ar.Length() == 0 { 1054 panic(fmt.Sprintf("invalid ar: %v", ar)) 1055 } 1056 if !pseg.Range().IsSupersetOf(ar) { 1057 panic(fmt.Sprintf("ar %v out of bounds %v", ar, pseg.Range())) 1058 } 1059 } 1060 1061 pma := pseg.ValuePtr() 1062 pstart := pseg.Start() 1063 return memmap.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)} 1064 }