github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/mm/pma.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mm
    16  
    17  import (
    18  	"fmt"
    19  	"sync/atomic"
    20  
    21  	"github.com/ttpreport/gvisor-ligolo/pkg/context"
    22  	"github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr"
    23  	"github.com/ttpreport/gvisor-ligolo/pkg/hostarch"
    24  	"github.com/ttpreport/gvisor-ligolo/pkg/safecopy"
    25  	"github.com/ttpreport/gvisor-ligolo/pkg/safemem"
    26  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/memmap"
    27  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/pgalloc"
    28  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/usage"
    29  )
    30  
    31  // existingPMAsLocked checks that pmas exist for all addresses in ar, and
    32  // support access of type (at, ignorePermissions). If so, it returns an
    33  // iterator to the pma containing ar.Start. Otherwise it returns a terminal
    34  // iterator.
    35  //
    36  // Preconditions:
    37  //   - mm.activeMu must be locked.
    38  //   - ar.Length() != 0.
    39  func (mm *MemoryManager) existingPMAsLocked(ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator {
    40  	if checkInvariants {
    41  		if !ar.WellFormed() || ar.Length() == 0 {
    42  			panic(fmt.Sprintf("invalid ar: %v", ar))
    43  		}
    44  	}
    45  
    46  	first := mm.pmas.FindSegment(ar.Start)
    47  	pseg := first
    48  	for pseg.Ok() {
    49  		pma := pseg.ValuePtr()
    50  		perms := pma.effectivePerms
    51  		if ignorePermissions {
    52  			perms = pma.maxPerms
    53  		}
    54  		if !perms.SupersetOf(at) {
    55  			return pmaIterator{}
    56  		}
    57  		if needInternalMappings && pma.internalMappings.IsEmpty() {
    58  			return pmaIterator{}
    59  		}
    60  
    61  		if ar.End <= pseg.End() {
    62  			return first
    63  		}
    64  		pseg, _ = pseg.NextNonEmpty()
    65  	}
    66  
    67  	// Ran out of pmas before reaching ar.End.
    68  	return pmaIterator{}
    69  }
    70  
    71  // existingVecPMAsLocked returns true if pmas exist for all addresses in ars,
    72  // and support access of type (at, ignorePermissions).
    73  //
    74  // Preconditions: mm.activeMu must be locked.
    75  func (mm *MemoryManager) existingVecPMAsLocked(ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) bool {
    76  	for ; !ars.IsEmpty(); ars = ars.Tail() {
    77  		if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() {
    78  			return false
    79  		}
    80  	}
    81  	return true
    82  }
    83  
    84  // getPMAsLocked ensures that pmas exist for all addresses in ar, and support
    85  // access of type at. It returns:
    86  //
    87  //   - An iterator to the pma containing ar.Start. If no pma contains ar.Start,
    88  //     the iterator is unspecified.
    89  //
    90  //   - An iterator to the gap after the last pma containing an address in ar. If
    91  //     pmas exist for no addresses in ar, the iterator is to a gap that begins
    92  //     before ar.Start.
    93  //
    94  //   - An error that is non-nil if pmas exist for only a subset of ar.
    95  //
    96  // Preconditions:
    97  //   - mm.mappingMu must be locked.
    98  //   - mm.activeMu must be locked for writing.
    99  //   - ar.Length() != 0.
   100  //   - vseg.Range().Contains(ar.Start).
   101  //   - vmas must exist for all addresses in ar, and support accesses of type at
   102  //     (i.e. permission checks must have been performed against vmas).
   103  func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) {
   104  	if checkInvariants {
   105  		if !ar.WellFormed() || ar.Length() == 0 {
   106  			panic(fmt.Sprintf("invalid ar: %v", ar))
   107  		}
   108  		if !vseg.Ok() {
   109  			panic("terminal vma iterator")
   110  		}
   111  		if !vseg.Range().Contains(ar.Start) {
   112  			panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar))
   113  		}
   114  	}
   115  
   116  	// Page-align ar so that all AddrRanges are aligned.
   117  	end, ok := ar.End.RoundUp()
   118  	var alignerr error
   119  	if !ok {
   120  		end = ar.End.RoundDown()
   121  		alignerr = linuxerr.EFAULT
   122  	}
   123  	ar = hostarch.AddrRange{ar.Start.RoundDown(), end}
   124  
   125  	pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at)
   126  	if pend.Start() <= ar.Start {
   127  		return pmaIterator{}, pend, perr
   128  	}
   129  	// getPMAsInternalLocked may not have returned pstart due to iterator
   130  	// invalidation.
   131  	if !pstart.Ok() {
   132  		pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend)
   133  	}
   134  	if perr != nil {
   135  		return pstart, pend, perr
   136  	}
   137  	return pstart, pend, alignerr
   138  }
   139  
   140  // getVecPMAsLocked ensures that pmas exist for all addresses in ars, and
   141  // support access of type at. It returns the subset of ars for which pmas
   142  // exist. If this is not equal to ars, it returns a non-nil error explaining
   143  // why.
   144  //
   145  // Preconditions:
   146  //   - mm.mappingMu must be locked.
   147  //   - mm.activeMu must be locked for writing.
   148  //   - vmas must exist for all addresses in ars, and support accesses of type at
   149  //     (i.e. permission checks must have been performed against vmas).
   150  func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType) (hostarch.AddrRangeSeq, error) {
   151  	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
   152  		ar := arsit.Head()
   153  		if ar.Length() == 0 {
   154  			continue
   155  		}
   156  		if checkInvariants {
   157  			if !ar.WellFormed() {
   158  				panic(fmt.Sprintf("invalid ar: %v", ar))
   159  			}
   160  		}
   161  
   162  		// Page-align ar so that all AddrRanges are aligned.
   163  		end, ok := ar.End.RoundUp()
   164  		var alignerr error
   165  		if !ok {
   166  			end = ar.End.RoundDown()
   167  			alignerr = linuxerr.EFAULT
   168  		}
   169  		ar = hostarch.AddrRange{ar.Start.RoundDown(), end}
   170  
   171  		_, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at)
   172  		if perr != nil {
   173  			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr
   174  		}
   175  		if alignerr != nil {
   176  			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), alignerr
   177  		}
   178  	}
   179  
   180  	return ars, nil
   181  }
   182  
   183  // getPMAsInternalLocked is equivalent to getPMAsLocked, with the following
   184  // exceptions:
   185  //
   186  //   - getPMAsInternalLocked returns a pmaIterator on a best-effort basis (that
   187  //     is, the returned iterator may be terminal, even if a pma that contains
   188  //     ar.Start exists). Returning this iterator on a best-effort basis allows
   189  //     callers that require it to use it when it's cheaply available, while also
   190  //     avoiding the overhead of retrieving it when it's not.
   191  //
   192  //   - getPMAsInternalLocked additionally requires that ar is page-aligned.
   193  //     getPMAsInternalLocked is an implementation helper for getPMAsLocked and
   194  //     getVecPMAsLocked; other clients should call one of those instead.
   195  func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) {
   196  	if checkInvariants {
   197  		if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
   198  			panic(fmt.Sprintf("invalid ar: %v", ar))
   199  		}
   200  		if !vseg.Ok() {
   201  			panic("terminal vma iterator")
   202  		}
   203  		if !vseg.Range().Contains(ar.Start) {
   204  			panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar))
   205  		}
   206  	}
   207  
   208  	memCgID := pgalloc.MemoryCgroupIDFromContext(ctx)
   209  	opts := pgalloc.AllocOpts{Kind: usage.Anonymous, Dir: pgalloc.BottomUp, MemCgID: memCgID}
   210  	vma := vseg.ValuePtr()
   211  	if uintptr(ar.Start) < atomic.LoadUintptr(&vma.lastFault) {
   212  		// Detect cases where memory is accessed downwards and change memory file
   213  		// allocation order to increase the chances that pages are coalesced.
   214  		opts.Dir = pgalloc.TopDown
   215  	}
   216  	atomic.StoreUintptr(&vma.lastFault, uintptr(ar.Start))
   217  
   218  	mf := mm.mfp.MemoryFile()
   219  	// Limit the range we allocate to ar, aligned to privateAllocUnit.
   220  	maskAR := privateAligned(ar)
   221  	didUnmapAS := false
   222  	// The range in which we iterate vmas and pmas is still limited to ar, to
   223  	// ensure that we don't allocate or COW-break a pma we don't need.
   224  	pseg, pgap := mm.pmas.Find(ar.Start)
   225  	pstart := pseg
   226  	for {
   227  		// Get pmas for this vma.
   228  		vsegAR := vseg.Range().Intersect(ar)
   229  		vma := vseg.ValuePtr()
   230  	pmaLoop:
   231  		for {
   232  			switch {
   233  			case pgap.Ok() && pgap.Start() < vsegAR.End:
   234  				// Need a pma here.
   235  				optAR := vseg.Range().Intersect(pgap.Range())
   236  				if checkInvariants {
   237  					if optAR.Length() == 0 {
   238  						panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap))
   239  					}
   240  				}
   241  				if vma.mappable == nil {
   242  					// Private anonymous mappings get pmas by allocating.
   243  					allocAR := optAR.Intersect(maskAR)
   244  					fr, err := mf.Allocate(uint64(allocAR.Length()), opts)
   245  					if err != nil {
   246  						return pstart, pgap, err
   247  					}
   248  					if checkInvariants {
   249  						if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) {
   250  							panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr))
   251  						}
   252  					}
   253  					mm.addRSSLocked(allocAR)
   254  					mm.incPrivateRef(fr)
   255  					mf.IncRef(fr, memCgID)
   256  					pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{
   257  						file:           mf,
   258  						off:            fr.Start,
   259  						translatePerms: hostarch.AnyAccess,
   260  						effectivePerms: vma.effectivePerms,
   261  						maxPerms:       vma.maxPerms,
   262  						// Since we just allocated this memory and have the
   263  						// only reference, the new pma does not need
   264  						// copy-on-write.
   265  						private: true,
   266  					}).NextNonEmpty()
   267  					pstart = pmaIterator{} // iterators invalidated
   268  				} else {
   269  					// Other mappings get pmas by translating.
   270  					optMR := vseg.mappableRangeOf(optAR)
   271  					reqAR := optAR.Intersect(ar)
   272  					reqMR := vseg.mappableRangeOf(reqAR)
   273  					perms := at
   274  					if vma.private {
   275  						// This pma will be copy-on-write; don't require write
   276  						// permission, but do require read permission to
   277  						// facilitate the copy.
   278  						//
   279  						// If at.Write is true, we will need to break
   280  						// copy-on-write immediately, which occurs after
   281  						// translation below.
   282  						perms.Read = true
   283  						perms.Write = false
   284  					}
   285  					ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms)
   286  					if checkInvariants {
   287  						if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil {
   288  							panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err))
   289  						}
   290  					}
   291  					// Install a pma for each translation.
   292  					if len(ts) == 0 {
   293  						return pstart, pgap, err
   294  					}
   295  					pstart = pmaIterator{} // iterators invalidated
   296  					for _, t := range ts {
   297  						newpmaAR := vseg.addrRangeOf(t.Source)
   298  						newpma := pma{
   299  							file:           t.File,
   300  							off:            t.Offset,
   301  							translatePerms: t.Perms,
   302  							effectivePerms: vma.effectivePerms.Intersect(t.Perms),
   303  							maxPerms:       vma.maxPerms.Intersect(t.Perms),
   304  						}
   305  						if vma.private {
   306  							newpma.effectivePerms.Write = false
   307  							newpma.maxPerms.Write = false
   308  							newpma.needCOW = true
   309  						}
   310  						mm.addRSSLocked(newpmaAR)
   311  						t.File.IncRef(t.FileRange(), memCgID)
   312  						// This is valid because memmap.Mappable.Translate is
   313  						// required to return Translations in increasing
   314  						// Translation.Source order.
   315  						pseg = mm.pmas.Insert(pgap, newpmaAR, newpma)
   316  						pgap = pseg.NextGap()
   317  					}
   318  					// The error returned by Translate is only significant if
   319  					// it occurred before ar.End.
   320  					if err != nil && vseg.addrRangeOf(ts[len(ts)-1].Source).End < ar.End {
   321  						return pstart, pgap, err
   322  					}
   323  					// Rewind pseg to the first pma inserted and continue the
   324  					// loop to check if we need to break copy-on-write.
   325  					pseg, pgap = mm.findOrSeekPrevUpperBoundPMA(vseg.addrRangeOf(ts[0].Source).Start, pgap), pmaGapIterator{}
   326  					continue
   327  				}
   328  
   329  			case pseg.Ok() && pseg.Start() < vsegAR.End:
   330  				oldpma := pseg.ValuePtr()
   331  				if at.Write && mm.isPMACopyOnWriteLocked(vseg, pseg) {
   332  					// Break copy-on-write by copying.
   333  					if checkInvariants {
   334  						if !oldpma.maxPerms.Read {
   335  							panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma))
   336  						}
   337  					}
   338  					var copyAR hostarch.AddrRange
   339  					if vma := vseg.ValuePtr(); vma.effectivePerms.Execute {
   340  						// The majority of copy-on-write breaks on executable
   341  						// pages come from:
   342  						//
   343  						//	- The ELF loader, which must zero out bytes on the
   344  						//		last page of each segment after the end of the
   345  						//		segment.
   346  						//
   347  						//	- gdb's use of ptrace to insert breakpoints.
   348  						//
   349  						// Neither of these cases has enough spatial locality
   350  						// to benefit from copying nearby pages, so if the vma
   351  						// is executable, only copy the pages required.
   352  						copyAR = pseg.Range().Intersect(ar)
   353  					} else if vma.growsDown {
   354  						// In most cases, the new process will not use most of
   355  						// its stack before exiting or invoking execve(); it is
   356  						// especially unlikely to return very far down its call
   357  						// stack, since async-signal-safety concerns in
   358  						// multithreaded programs prevent the new process from
   359  						// being able to do much. So only copy up to one page
   360  						// before and after the pages required.
   361  						stackMaskAR := ar
   362  						if newStart := stackMaskAR.Start - hostarch.PageSize; newStart < stackMaskAR.Start {
   363  							stackMaskAR.Start = newStart
   364  						}
   365  						if newEnd := stackMaskAR.End + hostarch.PageSize; newEnd > stackMaskAR.End {
   366  							stackMaskAR.End = newEnd
   367  						}
   368  						copyAR = pseg.Range().Intersect(stackMaskAR)
   369  					} else {
   370  						copyAR = pseg.Range().Intersect(maskAR)
   371  					}
   372  					// Get internal mappings from the pma to copy from.
   373  					if err := pseg.getInternalMappingsLocked(); err != nil {
   374  						return pstart, pseg.PrevGap(), err
   375  					}
   376  					// Copy contents.
   377  					fr, err := mf.Allocate(uint64(copyAR.Length()), pgalloc.AllocOpts{
   378  						Kind:    usage.Anonymous,
   379  						Mode:    pgalloc.AllocateAndWritePopulate,
   380  						MemCgID: memCgID,
   381  						Reader:  &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)},
   382  					})
   383  					if _, ok := err.(safecopy.BusError); ok {
   384  						// If we got SIGBUS during the copy, deliver SIGBUS to
   385  						// userspace (instead of SIGSEGV) if we're breaking
   386  						// copy-on-write due to application page fault.
   387  						err = &memmap.BusError{err}
   388  					}
   389  					if fr.Length() == 0 {
   390  						return pstart, pseg.PrevGap(), err
   391  					}
   392  					// Unmap all of maskAR, not just copyAR, to minimize host
   393  					// syscalls. AddressSpace mappings must be removed before
   394  					// mm.decPrivateRef().
   395  					if !didUnmapAS {
   396  						mm.unmapASLocked(maskAR)
   397  						didUnmapAS = true
   398  					}
   399  					// Replace the pma with a copy in the part of the address
   400  					// range where copying was successful. This doesn't change
   401  					// RSS.
   402  					copyAR.End = copyAR.Start + hostarch.Addr(fr.Length())
   403  					if copyAR != pseg.Range() {
   404  						pseg = mm.pmas.Isolate(pseg, copyAR)
   405  						pstart = pmaIterator{} // iterators invalidated
   406  					}
   407  					oldpma = pseg.ValuePtr()
   408  					if oldpma.private {
   409  						mm.decPrivateRef(pseg.fileRange())
   410  					}
   411  					oldpma.file.DecRef(pseg.fileRange())
   412  					mm.incPrivateRef(fr)
   413  					mf.IncRef(fr, memCgID)
   414  					oldpma.file = mf
   415  					oldpma.off = fr.Start
   416  					oldpma.translatePerms = hostarch.AnyAccess
   417  					oldpma.effectivePerms = vma.effectivePerms
   418  					oldpma.maxPerms = vma.maxPerms
   419  					oldpma.needCOW = false
   420  					oldpma.private = true
   421  					oldpma.internalMappings = safemem.BlockSeq{}
   422  					// Try to merge the pma with its neighbors.
   423  					if prev := pseg.PrevSegment(); prev.Ok() {
   424  						if merged := mm.pmas.Merge(prev, pseg); merged.Ok() {
   425  							pseg = merged
   426  							pstart = pmaIterator{} // iterators invalidated
   427  						}
   428  					}
   429  					if next := pseg.NextSegment(); next.Ok() {
   430  						if merged := mm.pmas.Merge(pseg, next); merged.Ok() {
   431  							pseg = merged
   432  							pstart = pmaIterator{} // iterators invalidated
   433  						}
   434  					}
   435  					// The error returned by AllocateAndFill is only
   436  					// significant if it occurred before ar.End.
   437  					if err != nil && pseg.End() < ar.End {
   438  						return pstart, pseg.NextGap(), err
   439  					}
   440  					// Ensure pseg and pgap are correct for the next iteration
   441  					// of the loop.
   442  					pseg, pgap = pseg.NextNonEmpty()
   443  				} else if !oldpma.translatePerms.SupersetOf(at) {
   444  					// Get new pmas (with sufficient permissions) by calling
   445  					// memmap.Mappable.Translate again.
   446  					if checkInvariants {
   447  						if oldpma.private {
   448  							panic(fmt.Sprintf("private pma %v has non-maximal pma.translatePerms: %v", pseg.Range(), oldpma))
   449  						}
   450  					}
   451  					// Allow the entire pma to be replaced.
   452  					optAR := pseg.Range()
   453  					optMR := vseg.mappableRangeOf(optAR)
   454  					reqAR := optAR.Intersect(ar)
   455  					reqMR := vseg.mappableRangeOf(reqAR)
   456  					perms := oldpma.translatePerms.Union(at)
   457  					ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms)
   458  					if checkInvariants {
   459  						if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil {
   460  							panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err))
   461  						}
   462  					}
   463  					// Remove the part of the existing pma covered by new
   464  					// Translations, then insert new pmas. This doesn't change
   465  					// RSS. Note that we don't need to call unmapASLocked: any
   466  					// existing AddressSpace mappings are still valid (though
   467  					// less permissive than the new pmas indicate) until
   468  					// Invalidate is called, and will be replaced by future
   469  					// calls to mapASLocked.
   470  					if len(ts) == 0 {
   471  						return pstart, pseg.PrevGap(), err
   472  					}
   473  					transMR := memmap.MappableRange{ts[0].Source.Start, ts[len(ts)-1].Source.End}
   474  					transAR := vseg.addrRangeOf(transMR)
   475  					pseg = mm.pmas.Isolate(pseg, transAR)
   476  					pseg.ValuePtr().file.DecRef(pseg.fileRange())
   477  					pgap = mm.pmas.Remove(pseg)
   478  					pstart = pmaIterator{} // iterators invalidated
   479  					for _, t := range ts {
   480  						newpmaAR := vseg.addrRangeOf(t.Source)
   481  						newpma := pma{
   482  							file:           t.File,
   483  							off:            t.Offset,
   484  							translatePerms: t.Perms,
   485  							effectivePerms: vma.effectivePerms.Intersect(t.Perms),
   486  							maxPerms:       vma.maxPerms.Intersect(t.Perms),
   487  						}
   488  						if vma.private {
   489  							newpma.effectivePerms.Write = false
   490  							newpma.maxPerms.Write = false
   491  							newpma.needCOW = true
   492  						}
   493  						t.File.IncRef(t.FileRange(), memCgID)
   494  						pseg = mm.pmas.Insert(pgap, newpmaAR, newpma)
   495  						pgap = pseg.NextGap()
   496  					}
   497  					// The error returned by Translate is only significant if
   498  					// it occurred before ar.End.
   499  					if err != nil && pseg.End() < ar.End {
   500  						return pstart, pgap, err
   501  					}
   502  					// Ensure pseg and pgap are correct for the next iteration
   503  					// of the loop.
   504  					if pgap.Range().Length() == 0 {
   505  						pseg, pgap = pgap.NextSegment(), pmaGapIterator{}
   506  					} else {
   507  						pseg = pmaIterator{}
   508  					}
   509  				} else {
   510  					// We have a usable pma; continue.
   511  					pseg, pgap = pseg.NextNonEmpty()
   512  				}
   513  
   514  			default:
   515  				break pmaLoop
   516  			}
   517  		}
   518  		// Go to the next vma.
   519  		if ar.End <= vseg.End() {
   520  			if pgap.Ok() {
   521  				return pstart, pgap, nil
   522  			}
   523  			return pstart, pseg.PrevGap(), nil
   524  		}
   525  		vseg = vseg.NextSegment()
   526  	}
   527  }
   528  
   529  const (
   530  	// When memory is allocated for a private pma, align the allocated address
   531  	// range to a privateAllocUnit boundary when possible. Larger values of
   532  	// privateAllocUnit may reduce page faults by allowing fewer, larger pmas
   533  	// to be mapped, but may result in larger amounts of wasted memory in the
   534  	// presence of fragmentation. privateAllocUnit must be a power-of-2
   535  	// multiple of hostarch.PageSize.
   536  	privateAllocUnit = hostarch.HugePageSize
   537  
   538  	privateAllocMask = privateAllocUnit - 1
   539  )
   540  
   541  func privateAligned(ar hostarch.AddrRange) hostarch.AddrRange {
   542  	aligned := hostarch.AddrRange{ar.Start &^ privateAllocMask, ar.End}
   543  	if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End {
   544  		aligned.End = end
   545  	}
   546  	if checkInvariants {
   547  		if !aligned.IsSupersetOf(ar) {
   548  			panic(fmt.Sprintf("aligned AddrRange %#v is not a superset of ar %#v", aligned, ar))
   549  		}
   550  	}
   551  	return aligned
   552  }
   553  
   554  // isPMACopyOnWriteLocked returns true if the contents of the pma represented
   555  // by pseg must be copied to a new private pma to be written to.
   556  //
   557  // If the pma is a copy-on-write private pma, and holds the only reference on
   558  // the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory
   559  // and update the pma to indicate that it does not require copy-on-write.
   560  //
   561  // Preconditions:
   562  //   - vseg.Range().IsSupersetOf(pseg.Range()).
   563  //   - mm.mappingMu must be locked.
   564  //   - mm.activeMu must be locked for writing.
   565  func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool {
   566  	pma := pseg.ValuePtr()
   567  	if !pma.needCOW {
   568  		return false
   569  	}
   570  	if !pma.private {
   571  		return true
   572  	}
   573  	// If we have the only reference on private memory to be copied, just take
   574  	// ownership of it instead of copying. If we do hold the only reference,
   575  	// additional references can only be taken by mm.Fork(), which is excluded
   576  	// by mm.activeMu, so this isn't racy.
   577  	mm.privateRefs.mu.Lock()
   578  	defer mm.privateRefs.mu.Unlock()
   579  	fr := pseg.fileRange()
   580  	// This check relies on mm.privateRefs.refs being kept fully merged.
   581  	rseg := mm.privateRefs.refs.FindSegment(fr.Start)
   582  	if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() {
   583  		pma.needCOW = false
   584  		// pma.private => pma.translatePerms == hostarch.AnyAccess
   585  		vma := vseg.ValuePtr()
   586  		pma.effectivePerms = vma.effectivePerms
   587  		pma.maxPerms = vma.maxPerms
   588  		return false
   589  	}
   590  	return true
   591  }
   592  
   593  // Invalidate implements memmap.MappingSpace.Invalidate.
   594  func (mm *MemoryManager) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) {
   595  	if checkInvariants {
   596  		if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
   597  			panic(fmt.Sprintf("invalid ar: %v", ar))
   598  		}
   599  	}
   600  
   601  	mm.activeMu.Lock()
   602  	defer mm.activeMu.Unlock()
   603  	if mm.captureInvalidations {
   604  		mm.capturedInvalidations = append(mm.capturedInvalidations, invalidateArgs{ar, opts})
   605  		return
   606  	}
   607  	mm.invalidateLocked(ar, opts.InvalidatePrivate, true)
   608  }
   609  
   610  // invalidateLocked removes pmas and AddressSpace mappings of those pmas for
   611  // addresses in ar.
   612  //
   613  // Preconditions:
   614  //   - mm.activeMu must be locked for writing.
   615  //   - ar.Length() != 0.
   616  //   - ar must be page-aligned.
   617  func (mm *MemoryManager) invalidateLocked(ar hostarch.AddrRange, invalidatePrivate, invalidateShared bool) {
   618  	if checkInvariants {
   619  		if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
   620  			panic(fmt.Sprintf("invalid ar: %v", ar))
   621  		}
   622  	}
   623  
   624  	var didUnmapAS bool
   625  	pseg := mm.pmas.LowerBoundSegment(ar.Start)
   626  	for pseg.Ok() && pseg.Start() < ar.End {
   627  		pma := pseg.ValuePtr()
   628  		if (invalidatePrivate && pma.private) || (invalidateShared && !pma.private) {
   629  			pseg = mm.pmas.Isolate(pseg, ar)
   630  			pma = pseg.ValuePtr()
   631  			if !didUnmapAS {
   632  				// Unmap all of ar, not just pseg.Range(), to minimize host
   633  				// syscalls. AddressSpace mappings must be removed before
   634  				// mm.decPrivateRef().
   635  				//
   636  				// Note that we do more than just ar here, and extrapolate
   637  				// to the end of any previous region that we may have mapped.
   638  				// This is done to ensure that lower layers can fully invalidate
   639  				// intermediate pagetable pages during the unmap.
   640  				var unmapAR hostarch.AddrRange
   641  				if prev := pseg.PrevSegment(); prev.Ok() {
   642  					unmapAR.Start = prev.End()
   643  				} else {
   644  					unmapAR.Start = mm.layout.MinAddr
   645  				}
   646  				if last := mm.pmas.LowerBoundSegment(ar.End); last.Ok() {
   647  					if last.Start() < ar.End {
   648  						unmapAR.End = ar.End
   649  					} else {
   650  						unmapAR.End = last.Start()
   651  					}
   652  				} else {
   653  					unmapAR.End = mm.layout.MaxAddr
   654  				}
   655  				mm.unmapASLocked(unmapAR)
   656  				didUnmapAS = true
   657  			}
   658  			if pma.private {
   659  				mm.decPrivateRef(pseg.fileRange())
   660  			}
   661  			mm.removeRSSLocked(pseg.Range())
   662  			pma.file.DecRef(pseg.fileRange())
   663  			pseg = mm.pmas.Remove(pseg).NextSegment()
   664  		} else {
   665  			pseg = pseg.NextSegment()
   666  		}
   667  	}
   668  }
   669  
   670  // Pin returns the memmap.File ranges currently mapped by addresses in ar in
   671  // mm, acquiring a reference on the returned ranges which the caller must
   672  // release by calling Unpin. If not all addresses are mapped, Pin returns a
   673  // non-nil error. Note that Pin may return both a non-empty slice of
   674  // PinnedRanges and a non-nil error.
   675  //
   676  // Pin does not prevent mapped ranges from changing, making it unsuitable for
   677  // most I/O. It should only be used in contexts that would use get_user_pages()
   678  // in the Linux kernel.
   679  //
   680  // Preconditions:
   681  //   - ar.Length() != 0.
   682  //   - ar must be page-aligned.
   683  func (mm *MemoryManager) Pin(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) ([]PinnedRange, error) {
   684  	if checkInvariants {
   685  		if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
   686  			panic(fmt.Sprintf("invalid ar: %v", ar))
   687  		}
   688  	}
   689  
   690  	// Ensure that we have usable vmas.
   691  	mm.mappingMu.RLock()
   692  	vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions)
   693  	if vendaddr := vend.Start(); vendaddr < ar.End {
   694  		if vendaddr <= ar.Start {
   695  			mm.mappingMu.RUnlock()
   696  			return nil, verr
   697  		}
   698  		ar.End = vendaddr
   699  	}
   700  
   701  	// Ensure that we have usable pmas.
   702  	mm.activeMu.Lock()
   703  	pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at)
   704  	mm.mappingMu.RUnlock()
   705  	if pendaddr := pend.Start(); pendaddr < ar.End {
   706  		if pendaddr <= ar.Start {
   707  			mm.activeMu.Unlock()
   708  			return nil, perr
   709  		}
   710  		ar.End = pendaddr
   711  	}
   712  
   713  	memCgID := pgalloc.MemoryCgroupIDFromContext(ctx)
   714  	// Gather pmas.
   715  	var prs []PinnedRange
   716  	for pseg.Ok() && pseg.Start() < ar.End {
   717  		psar := pseg.Range().Intersect(ar)
   718  		f := pseg.ValuePtr().file
   719  		fr := pseg.fileRangeOf(psar)
   720  		f.IncRef(fr, memCgID)
   721  		prs = append(prs, PinnedRange{
   722  			Source: psar,
   723  			File:   f,
   724  			Offset: fr.Start,
   725  		})
   726  		pseg = pseg.NextSegment()
   727  	}
   728  	mm.activeMu.Unlock()
   729  
   730  	// Return the first error in order of progress through ar.
   731  	if perr != nil {
   732  		return prs, perr
   733  	}
   734  	return prs, verr
   735  }
   736  
   737  // PinnedRanges are returned by MemoryManager.Pin.
   738  type PinnedRange struct {
   739  	// Source is the corresponding range of addresses.
   740  	Source hostarch.AddrRange
   741  
   742  	// File is the mapped file.
   743  	File memmap.File
   744  
   745  	// Offset is the offset into File at which this PinnedRange begins.
   746  	Offset uint64
   747  }
   748  
   749  // FileRange returns the memmap.File offsets mapped by pr.
   750  func (pr PinnedRange) FileRange() memmap.FileRange {
   751  	return memmap.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())}
   752  }
   753  
   754  // Unpin releases the reference held by prs.
   755  func Unpin(prs []PinnedRange) {
   756  	for i := range prs {
   757  		prs[i].File.DecRef(prs[i].FileRange())
   758  	}
   759  }
   760  
   761  // movePMAsLocked moves all pmas in oldAR to newAR.
   762  //
   763  // Preconditions:
   764  //   - mm.activeMu must be locked for writing.
   765  //   - oldAR.Length() != 0.
   766  //   - oldAR.Length() <= newAR.Length().
   767  //   - !oldAR.Overlaps(newAR).
   768  //   - mm.pmas.IsEmptyRange(newAR).
   769  //   - oldAR and newAR must be page-aligned.
   770  func (mm *MemoryManager) movePMAsLocked(oldAR, newAR hostarch.AddrRange) {
   771  	if checkInvariants {
   772  		if !oldAR.WellFormed() || oldAR.Length() == 0 || !oldAR.IsPageAligned() {
   773  			panic(fmt.Sprintf("invalid oldAR: %v", oldAR))
   774  		}
   775  		if !newAR.WellFormed() || newAR.Length() == 0 || !newAR.IsPageAligned() {
   776  			panic(fmt.Sprintf("invalid newAR: %v", newAR))
   777  		}
   778  		if oldAR.Length() > newAR.Length() {
   779  			panic(fmt.Sprintf("old address range %v may contain pmas that will not fit in new address range %v", oldAR, newAR))
   780  		}
   781  		if oldAR.Overlaps(newAR) {
   782  			panic(fmt.Sprintf("old and new address ranges overlap: %v, %v", oldAR, newAR))
   783  		}
   784  		// mm.pmas.IsEmptyRange is checked by mm.pmas.Insert.
   785  	}
   786  
   787  	type movedPMA struct {
   788  		oldAR hostarch.AddrRange
   789  		pma   pma
   790  	}
   791  	var movedPMAs []movedPMA
   792  	pseg := mm.pmas.LowerBoundSegment(oldAR.Start)
   793  	for pseg.Ok() && pseg.Start() < oldAR.End {
   794  		pseg = mm.pmas.Isolate(pseg, oldAR)
   795  		movedPMAs = append(movedPMAs, movedPMA{
   796  			oldAR: pseg.Range(),
   797  			pma:   pseg.Value(),
   798  		})
   799  		pseg = mm.pmas.Remove(pseg).NextSegment()
   800  		// No RSS change is needed since we're re-inserting the same pmas
   801  		// below.
   802  	}
   803  
   804  	off := newAR.Start - oldAR.Start
   805  	pgap := mm.pmas.FindGap(newAR.Start)
   806  	for i := range movedPMAs {
   807  		mpma := &movedPMAs[i]
   808  		pmaNewAR := hostarch.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off}
   809  		pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap()
   810  	}
   811  
   812  	mm.unmapASLocked(oldAR)
   813  }
   814  
   815  // getPMAInternalMappingsLocked ensures that pmas for all addresses in ar have
   816  // cached internal mappings. It returns:
   817  //
   818  //   - An iterator to the gap after the last pma with internal mappings
   819  //     containing an address in ar. If internal mappings exist for no addresses in
   820  //     ar, the iterator is to a gap that begins before ar.Start.
   821  //
   822  //   - An error that is non-nil if internal mappings exist for only a subset of
   823  //     ar.
   824  //
   825  // Preconditions:
   826  //   - mm.activeMu must be locked for writing.
   827  //   - pseg.Range().Contains(ar.Start).
   828  //   - pmas must exist for all addresses in ar.
   829  //   - ar.Length() != 0.
   830  //
   831  // Postconditions: getPMAInternalMappingsLocked does not invalidate iterators
   832  // into mm.pmas.
   833  func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) (pmaGapIterator, error) {
   834  	if checkInvariants {
   835  		if !ar.WellFormed() || ar.Length() == 0 {
   836  			panic(fmt.Sprintf("invalid ar: %v", ar))
   837  		}
   838  		if !pseg.Range().Contains(ar.Start) {
   839  			panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar))
   840  		}
   841  	}
   842  
   843  	for {
   844  		if err := pseg.getInternalMappingsLocked(); err != nil {
   845  			return pseg.PrevGap(), err
   846  		}
   847  		if ar.End <= pseg.End() {
   848  			return pseg.NextGap(), nil
   849  		}
   850  		pseg, _ = pseg.NextNonEmpty()
   851  	}
   852  }
   853  
   854  // getVecPMAInternalMappingsLocked ensures that pmas for all addresses in ars
   855  // have cached internal mappings. It returns the subset of ars for which
   856  // internal mappings exist. If this is not equal to ars, it returns a non-nil
   857  // error explaining why.
   858  //
   859  // Preconditions:
   860  //   - mm.activeMu must be locked for writing.
   861  //   - pmas must exist for all addresses in ar.
   862  //
   863  // Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators
   864  // into mm.pmas.
   865  func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars hostarch.AddrRangeSeq) (hostarch.AddrRangeSeq, error) {
   866  	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
   867  		ar := arsit.Head()
   868  		if ar.Length() == 0 {
   869  			continue
   870  		}
   871  		if pend, err := mm.getPMAInternalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); err != nil {
   872  			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), err
   873  		}
   874  	}
   875  	return ars, nil
   876  }
   877  
   878  // internalMappingsLocked returns internal mappings for addresses in ar.
   879  //
   880  // Preconditions:
   881  //   - mm.activeMu must be locked.
   882  //   - Internal mappings must have been previously established for all addresses
   883  //     in ar.
   884  //   - ar.Length() != 0.
   885  //   - pseg.Range().Contains(ar.Start).
   886  func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) safemem.BlockSeq {
   887  	if checkInvariants {
   888  		if !ar.WellFormed() || ar.Length() == 0 {
   889  			panic(fmt.Sprintf("invalid ar: %v", ar))
   890  		}
   891  		if !pseg.Range().Contains(ar.Start) {
   892  			panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar))
   893  		}
   894  	}
   895  
   896  	if ar.End <= pseg.End() {
   897  		// Since only one pma is involved, we can use pma.internalMappings
   898  		// directly, avoiding a slice allocation.
   899  		offset := uint64(ar.Start - pseg.Start())
   900  		return pseg.ValuePtr().internalMappings.DropFirst64(offset).TakeFirst64(uint64(ar.Length()))
   901  	}
   902  
   903  	var ims []safemem.Block
   904  	for {
   905  		pr := pseg.Range().Intersect(ar)
   906  		for pims := pseg.ValuePtr().internalMappings.DropFirst64(uint64(pr.Start - pseg.Start())).TakeFirst64(uint64(pr.Length())); !pims.IsEmpty(); pims = pims.Tail() {
   907  			ims = append(ims, pims.Head())
   908  		}
   909  		if ar.End <= pseg.End() {
   910  			break
   911  		}
   912  		pseg = pseg.NextSegment()
   913  	}
   914  	return safemem.BlockSeqFromSlice(ims)
   915  }
   916  
   917  // vecInternalMappingsLocked returns internal mappings for addresses in ars.
   918  //
   919  // Preconditions:
   920  //   - mm.activeMu must be locked.
   921  //   - Internal mappings must have been previously established for all addresses
   922  //     in ars.
   923  func (mm *MemoryManager) vecInternalMappingsLocked(ars hostarch.AddrRangeSeq) safemem.BlockSeq {
   924  	var ims []safemem.Block
   925  	for ; !ars.IsEmpty(); ars = ars.Tail() {
   926  		ar := ars.Head()
   927  		if ar.Length() == 0 {
   928  			continue
   929  		}
   930  		for pims := mm.internalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); !pims.IsEmpty(); pims = pims.Tail() {
   931  			ims = append(ims, pims.Head())
   932  		}
   933  	}
   934  	return safemem.BlockSeqFromSlice(ims)
   935  }
   936  
   937  // incPrivateRef acquires a reference on private pages in fr.
   938  func (mm *MemoryManager) incPrivateRef(fr memmap.FileRange) {
   939  	mm.privateRefs.mu.Lock()
   940  	defer mm.privateRefs.mu.Unlock()
   941  	refSet := &mm.privateRefs.refs
   942  	seg, gap := refSet.Find(fr.Start)
   943  	for {
   944  		switch {
   945  		case seg.Ok() && seg.Start() < fr.End:
   946  			seg = refSet.Isolate(seg, fr)
   947  			seg.SetValue(seg.Value() + 1)
   948  			seg, gap = seg.NextNonEmpty()
   949  		case gap.Ok() && gap.Start() < fr.End:
   950  			seg, gap = refSet.InsertWithoutMerging(gap, gap.Range().Intersect(fr), 1).NextNonEmpty()
   951  		default:
   952  			refSet.MergeAdjacent(fr)
   953  			return
   954  		}
   955  	}
   956  }
   957  
   958  // decPrivateRef releases a reference on private pages in fr.
   959  func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) {
   960  	var freed []memmap.FileRange
   961  
   962  	mm.privateRefs.mu.Lock()
   963  	refSet := &mm.privateRefs.refs
   964  	seg := refSet.LowerBoundSegment(fr.Start)
   965  	for seg.Ok() && seg.Start() < fr.End {
   966  		seg = refSet.Isolate(seg, fr)
   967  		if old := seg.Value(); old == 1 {
   968  			freed = append(freed, seg.Range())
   969  			seg = refSet.Remove(seg).NextSegment()
   970  		} else {
   971  			seg.SetValue(old - 1)
   972  			seg = seg.NextSegment()
   973  		}
   974  	}
   975  	refSet.MergeAdjacent(fr)
   976  	mm.privateRefs.mu.Unlock()
   977  
   978  	mf := mm.mfp.MemoryFile()
   979  	for _, fr := range freed {
   980  		mf.DecRef(fr)
   981  	}
   982  }
   983  
   984  // addRSSLocked updates the current and maximum resident set size of a
   985  // MemoryManager to reflect the insertion of a pma at ar.
   986  //
   987  // Preconditions: mm.activeMu must be locked for writing.
   988  func (mm *MemoryManager) addRSSLocked(ar hostarch.AddrRange) {
   989  	mm.curRSS += uint64(ar.Length())
   990  	if mm.curRSS > mm.maxRSS {
   991  		mm.maxRSS = mm.curRSS
   992  	}
   993  }
   994  
   995  // removeRSSLocked updates the current resident set size of a MemoryManager to
   996  // reflect the removal of a pma at ar.
   997  //
   998  // Preconditions: mm.activeMu must be locked for writing.
   999  func (mm *MemoryManager) removeRSSLocked(ar hostarch.AddrRange) {
  1000  	mm.curRSS -= uint64(ar.Length())
  1001  }
  1002  
  1003  // pmaSetFunctions implements segment.Functions for pmaSet.
  1004  type pmaSetFunctions struct{}
  1005  
  1006  func (pmaSetFunctions) MinKey() hostarch.Addr {
  1007  	return 0
  1008  }
  1009  
  1010  func (pmaSetFunctions) MaxKey() hostarch.Addr {
  1011  	return ^hostarch.Addr(0)
  1012  }
  1013  
  1014  func (pmaSetFunctions) ClearValue(pma *pma) {
  1015  	pma.file = nil
  1016  	pma.internalMappings = safemem.BlockSeq{}
  1017  }
  1018  
  1019  func (pmaSetFunctions) Merge(ar1 hostarch.AddrRange, pma1 pma, ar2 hostarch.AddrRange, pma2 pma) (pma, bool) {
  1020  	if pma1.file != pma2.file ||
  1021  		pma1.off+uint64(ar1.Length()) != pma2.off ||
  1022  		pma1.translatePerms != pma2.translatePerms ||
  1023  		pma1.effectivePerms != pma2.effectivePerms ||
  1024  		pma1.maxPerms != pma2.maxPerms ||
  1025  		pma1.needCOW != pma2.needCOW ||
  1026  		pma1.private != pma2.private {
  1027  		return pma{}, false
  1028  	}
  1029  
  1030  	// Discard internal mappings instead of trying to merge them, since merging
  1031  	// them requires an allocation and getting them again from the
  1032  	// memmap.File might not.
  1033  	pma1.internalMappings = safemem.BlockSeq{}
  1034  	return pma1, true
  1035  }
  1036  
  1037  func (pmaSetFunctions) Split(ar hostarch.AddrRange, p pma, split hostarch.Addr) (pma, pma) {
  1038  	newlen1 := uint64(split - ar.Start)
  1039  	p2 := p
  1040  	p2.off += newlen1
  1041  	if !p.internalMappings.IsEmpty() {
  1042  		p.internalMappings = p.internalMappings.TakeFirst64(newlen1)
  1043  		p2.internalMappings = p2.internalMappings.DropFirst64(newlen1)
  1044  	}
  1045  	return p, p2
  1046  }
  1047  
  1048  // findOrSeekPrevUpperBoundPMA returns mm.pmas.UpperBoundSegment(addr), but may do
  1049  // so by scanning linearly backward from pgap.
  1050  //
  1051  // Preconditions:
  1052  //   - mm.activeMu must be locked.
  1053  //   - addr <= pgap.Start().
  1054  func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr hostarch.Addr, pgap pmaGapIterator) pmaIterator {
  1055  	if checkInvariants {
  1056  		if !pgap.Ok() {
  1057  			panic("terminal pma iterator")
  1058  		}
  1059  		if addr > pgap.Start() {
  1060  			panic(fmt.Sprintf("can't seek backward to %#x from %#x", addr, pgap.Start()))
  1061  		}
  1062  	}
  1063  	// Optimistically check if pgap.PrevSegment() is the PMA we're looking for,
  1064  	// which is the case if findOrSeekPrevUpperBoundPMA is called to find the
  1065  	// start of a range containing only a single PMA.
  1066  	if pseg := pgap.PrevSegment(); pseg.Start() <= addr {
  1067  		return pseg
  1068  	}
  1069  	return mm.pmas.UpperBoundSegment(addr)
  1070  }
  1071  
  1072  // getInternalMappingsLocked ensures that pseg.ValuePtr().internalMappings is
  1073  // non-empty.
  1074  //
  1075  // Preconditions: mm.activeMu must be locked for writing.
  1076  func (pseg pmaIterator) getInternalMappingsLocked() error {
  1077  	pma := pseg.ValuePtr()
  1078  	if pma.internalMappings.IsEmpty() {
  1079  		// This must use maxPerms (instead of perms) because some permission
  1080  		// constraints are only visible to vmas; for example, mappings of
  1081  		// read-only files have vma.maxPerms.Write unset, but this may not be
  1082  		// visible to the memmap.Mappable.
  1083  		perms := pma.maxPerms
  1084  		// We will never execute application code through an internal mapping.
  1085  		perms.Execute = false
  1086  		ims, err := pma.file.MapInternal(pseg.fileRange(), perms)
  1087  		if err != nil {
  1088  			return err
  1089  		}
  1090  		pma.internalMappings = ims
  1091  	}
  1092  	return nil
  1093  }
  1094  
  1095  func (pseg pmaIterator) fileRange() memmap.FileRange {
  1096  	return pseg.fileRangeOf(pseg.Range())
  1097  }
  1098  
  1099  // Preconditions:
  1100  //   - pseg.Range().IsSupersetOf(ar).
  1101  //   - ar.Length != 0.
  1102  func (pseg pmaIterator) fileRangeOf(ar hostarch.AddrRange) memmap.FileRange {
  1103  	if checkInvariants {
  1104  		if !pseg.Ok() {
  1105  			panic("terminal pma iterator")
  1106  		}
  1107  		if !ar.WellFormed() || ar.Length() == 0 {
  1108  			panic(fmt.Sprintf("invalid ar: %v", ar))
  1109  		}
  1110  		if !pseg.Range().IsSupersetOf(ar) {
  1111  			panic(fmt.Sprintf("ar %v out of bounds %v", ar, pseg.Range()))
  1112  		}
  1113  	}
  1114  
  1115  	pma := pseg.ValuePtr()
  1116  	pstart := pseg.Start()
  1117  	return memmap.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)}
  1118  }