github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/mm/io.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mm
    16  
    17  import (
    18  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    19  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/safemem"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/usermem"
    24  )
    25  
    26  // There are two supported ways to copy data to/from application virtual
    27  // memory:
    28  //
    29  // 1. Internally-mapped copying: Determine the platform.File that backs the
    30  // copied-to/from virtual address, obtain a mapping of its pages, and read or
    31  // write to the mapping.
    32  //
    33  // 2. AddressSpace copying: If platform.Platform.SupportsAddressSpaceIO() is
    34  // true, AddressSpace permissions are applicable, and an AddressSpace is
    35  // available, copy directly through the AddressSpace, handling faults as
    36  // needed.
    37  //
    38  // (Given that internally-mapped copying requires that backing memory is always
    39  // implemented using a host file descriptor, we could also preadv/pwritev to it
    40  // instead. But this would incur a host syscall for each use of the mapped
    41  // page, whereas mmap is a one-time cost.)
    42  //
    43  // The fixed overhead of internally-mapped copying is expected to be higher
    44  // than that of AddressSpace copying since the former always needs to translate
    45  // addresses, whereas the latter only needs to do so when faults occur.
    46  // However, the throughput of internally-mapped copying is expected to be
    47  // somewhat higher than that of AddressSpace copying due to the high cost of
    48  // page faults and because implementations of the latter usually rely on
    49  // safecopy, which doesn't use AVX registers. So we prefer to use AddressSpace
    50  // copying (when available) for smaller copies, and switch to internally-mapped
    51  // copying once a size threshold is exceeded.
    52  const (
    53  	// copyMapMinBytes is the size threshold for switching to internally-mapped
    54  	// copying in CopyOut, CopyIn, and ZeroOut.
    55  	copyMapMinBytes = 32 << 10 // 32 KB
    56  
    57  	// rwMapMinBytes is the size threshold for switching to internally-mapped
    58  	// copying in CopyOutFrom and CopyInTo. It's lower than copyMapMinBytes
    59  	// since AddressSpace copying in this case requires additional buffering;
    60  	// see CopyOutFrom for details.
    61  	rwMapMinBytes = 512
    62  )
    63  
    64  // CheckIORange is similar to hostarch.Addr.ToRange, but applies bounds checks
    65  // consistent with Linux's arch/x86/include/asm/uaccess.h:access_ok().
    66  //
    67  // Preconditions: length >= 0.
    68  func (mm *MemoryManager) CheckIORange(addr hostarch.Addr, length int64) (hostarch.AddrRange, bool) {
    69  	// Note that access_ok() constrains end even if length == 0.
    70  	ar, ok := addr.ToRange(uint64(length))
    71  	return ar, (ok && ar.End <= mm.layout.MaxAddr)
    72  }
    73  
    74  // checkIOVec applies bound checks consistent with Linux's
    75  // arch/x86/include/asm/uaccess.h:access_ok() to ars.
    76  func (mm *MemoryManager) checkIOVec(ars hostarch.AddrRangeSeq) bool {
    77  	for !ars.IsEmpty() {
    78  		ar := ars.Head()
    79  		if _, ok := mm.CheckIORange(ar.Start, int64(ar.Length())); !ok {
    80  			return false
    81  		}
    82  		ars = ars.Tail()
    83  	}
    84  	return true
    85  }
    86  
    87  func (mm *MemoryManager) asioEnabled(opts usermem.IOOpts) bool {
    88  	return mm.haveASIO && !opts.IgnorePermissions && opts.AddressSpaceActive
    89  }
    90  
    91  // translateIOError converts errors to EFAULT, as is usually reported for all
    92  // I/O errors originating from MM in Linux.
    93  func translateIOError(ctx context.Context, err error) error {
    94  	if err == nil {
    95  		return nil
    96  	}
    97  	if logIOErrors {
    98  		ctx.Debugf("MM I/O error: %v", err)
    99  	}
   100  	return linuxerr.EFAULT
   101  }
   102  
   103  // CopyOut implements usermem.IO.CopyOut.
   104  func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
   105  	ar, ok := mm.CheckIORange(addr, int64(len(src)))
   106  	if !ok {
   107  		return 0, linuxerr.EFAULT
   108  	}
   109  
   110  	if len(src) == 0 {
   111  		return 0, nil
   112  	}
   113  
   114  	// Do AddressSpace IO if applicable.
   115  	if mm.asioEnabled(opts) && len(src) < copyMapMinBytes {
   116  		return mm.asCopyOut(ctx, addr, src)
   117  	}
   118  
   119  	// Go through internal mappings.
   120  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   121  		n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
   122  		return n, translateIOError(ctx, err)
   123  	})
   124  	return int(n64), err
   125  }
   126  
   127  func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte) (int, error) {
   128  	var done int
   129  	for {
   130  		n, err := mm.as.CopyOut(addr+hostarch.Addr(done), src[done:])
   131  		done += n
   132  		if err == nil {
   133  			return done, nil
   134  		}
   135  		if f, ok := err.(platform.SegmentationFault); ok {
   136  			ar, _ := addr.ToRange(uint64(len(src)))
   137  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
   138  				return done, err
   139  			}
   140  			continue
   141  		}
   142  		return done, translateIOError(ctx, err)
   143  	}
   144  }
   145  
   146  // CopyIn implements usermem.IO.CopyIn.
   147  func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
   148  	ar, ok := mm.CheckIORange(addr, int64(len(dst)))
   149  	if !ok {
   150  		return 0, linuxerr.EFAULT
   151  	}
   152  
   153  	if len(dst) == 0 {
   154  		return 0, nil
   155  	}
   156  
   157  	// Do AddressSpace IO if applicable.
   158  	if mm.asioEnabled(opts) && len(dst) < copyMapMinBytes {
   159  		return mm.asCopyIn(ctx, addr, dst)
   160  	}
   161  
   162  	// Go through internal mappings.
   163  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   164  		n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims)
   165  		return n, translateIOError(ctx, err)
   166  	})
   167  	return int(n64), err
   168  }
   169  
   170  func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte) (int, error) {
   171  	var done int
   172  	for {
   173  		n, err := mm.as.CopyIn(addr+hostarch.Addr(done), dst[done:])
   174  		done += n
   175  		if err == nil {
   176  			return done, nil
   177  		}
   178  		if f, ok := err.(platform.SegmentationFault); ok {
   179  			ar, _ := addr.ToRange(uint64(len(dst)))
   180  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
   181  				return done, err
   182  			}
   183  			continue
   184  		}
   185  		return done, translateIOError(ctx, err)
   186  	}
   187  }
   188  
   189  // ZeroOut implements usermem.IO.ZeroOut.
   190  func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
   191  	ar, ok := mm.CheckIORange(addr, toZero)
   192  	if !ok {
   193  		return 0, linuxerr.EFAULT
   194  	}
   195  
   196  	if toZero == 0 {
   197  		return 0, nil
   198  	}
   199  
   200  	// Do AddressSpace IO if applicable.
   201  	if mm.asioEnabled(opts) && toZero < copyMapMinBytes {
   202  		return mm.asZeroOut(ctx, addr, toZero)
   203  	}
   204  
   205  	// Go through internal mappings.
   206  	return mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) {
   207  		n, err := safemem.ZeroSeq(dsts)
   208  		return n, translateIOError(ctx, err)
   209  	})
   210  }
   211  
   212  func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64) (int64, error) {
   213  	var done int64
   214  	for {
   215  		n, err := mm.as.ZeroOut(addr+hostarch.Addr(done), uintptr(toZero-done))
   216  		done += int64(n)
   217  		if err == nil {
   218  			return done, nil
   219  		}
   220  		if f, ok := err.(platform.SegmentationFault); ok {
   221  			ar, _ := addr.ToRange(uint64(toZero))
   222  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
   223  				return done, err
   224  			}
   225  			continue
   226  		}
   227  		return done, translateIOError(ctx, err)
   228  	}
   229  }
   230  
   231  // CopyOutFrom implements usermem.IO.CopyOutFrom.
   232  func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
   233  	if !mm.checkIOVec(ars) {
   234  		return 0, linuxerr.EFAULT
   235  	}
   236  
   237  	if ars.NumBytes() == 0 {
   238  		return 0, nil
   239  	}
   240  
   241  	// Do AddressSpace IO if applicable.
   242  	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
   243  		// We have to introduce a buffered copy, instead of just passing a
   244  		// safemem.BlockSeq representing addresses in the AddressSpace to src.
   245  		// This is because usermem.IO.CopyOutFrom() guarantees that it calls
   246  		// src.ReadToBlocks() at most once, which is incompatible with handling
   247  		// faults between calls. In the future, this is probably best resolved
   248  		// by introducing a CopyOutFrom variant or option that allows it to
   249  		// call src.ReadToBlocks() any number of times.
   250  		//
   251  		// This issue applies to CopyInTo as well.
   252  		buf := make([]byte, int(ars.NumBytes()))
   253  		bufN, bufErr := src.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)))
   254  		var done int64
   255  		for done < int64(bufN) {
   256  			ar := ars.Head()
   257  			cplen := int64(ar.Length())
   258  			if cplen > int64(bufN)-done {
   259  				cplen = int64(bufN) - done
   260  			}
   261  			n, err := mm.asCopyOut(ctx, ar.Start, buf[int(done):int(done+cplen)])
   262  			done += int64(n)
   263  			if err != nil {
   264  				return done, err
   265  			}
   266  			ars = ars.Tail()
   267  		}
   268  		// Do not convert errors returned by src to EFAULT.
   269  		return done, bufErr
   270  	}
   271  
   272  	// Go through internal mappings.
   273  	return mm.withVecInternalMappings(ctx, ars, hostarch.Write, opts.IgnorePermissions, src.ReadToBlocks)
   274  }
   275  
   276  // CopyInTo implements usermem.IO.CopyInTo.
   277  func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
   278  	if !mm.checkIOVec(ars) {
   279  		return 0, linuxerr.EFAULT
   280  	}
   281  
   282  	if ars.NumBytes() == 0 {
   283  		return 0, nil
   284  	}
   285  
   286  	// Do AddressSpace IO if applicable.
   287  	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
   288  		buf := make([]byte, int(ars.NumBytes()))
   289  		var done int
   290  		var bufErr error
   291  		for !ars.IsEmpty() {
   292  			ar := ars.Head()
   293  			var n int
   294  			n, bufErr = mm.asCopyIn(ctx, ar.Start, buf[done:done+int(ar.Length())])
   295  			done += n
   296  			if bufErr != nil {
   297  				break
   298  			}
   299  			ars = ars.Tail()
   300  		}
   301  		n, err := dst.WriteFromBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:done])))
   302  		if err != nil {
   303  			return int64(n), err
   304  		}
   305  		// Do not convert errors returned by dst to EFAULT.
   306  		return int64(n), bufErr
   307  	}
   308  
   309  	// Go through internal mappings.
   310  	return mm.withVecInternalMappings(ctx, ars, hostarch.Read, opts.IgnorePermissions, dst.WriteFromBlocks)
   311  }
   312  
   313  // EnsurePMAsExist attempts to ensure that PMAs exist for the given addr with the
   314  // requested length. It returns the length to which it was able to either
   315  // initialize PMAs for, or ascertain that PMAs exist for. If this length is
   316  // smaller than the requested length it returns an error explaining why.
   317  func (mm *MemoryManager) EnsurePMAsExist(ctx context.Context, addr hostarch.Addr, length int64, opts usermem.IOOpts) (int64, error) {
   318  	ar, ok := mm.CheckIORange(addr, length)
   319  	if !ok {
   320  		return 0, linuxerr.EFAULT
   321  	}
   322  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   323  		return uint64(ims.NumBytes()), nil
   324  	})
   325  	return int64(n64), err
   326  }
   327  
   328  // SwapUint32 implements usermem.IO.SwapUint32.
   329  func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
   330  	ar, ok := mm.CheckIORange(addr, 4)
   331  	if !ok {
   332  		return 0, linuxerr.EFAULT
   333  	}
   334  
   335  	// Do AddressSpace IO if applicable.
   336  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   337  		for {
   338  			old, err := mm.as.SwapUint32(addr, new)
   339  			if err == nil {
   340  				return old, nil
   341  			}
   342  			if f, ok := err.(platform.SegmentationFault); ok {
   343  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
   344  					return 0, err
   345  				}
   346  				continue
   347  			}
   348  			return 0, translateIOError(ctx, err)
   349  		}
   350  	}
   351  
   352  	// Go through internal mappings.
   353  	var old uint32
   354  	_, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   355  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   356  			// Atomicity is unachievable across mappings.
   357  			return 0, linuxerr.EFAULT
   358  		}
   359  		im := ims.Head()
   360  		var err error
   361  		old, err = safemem.SwapUint32(im, new)
   362  		if err != nil {
   363  			return 0, translateIOError(ctx, err)
   364  		}
   365  		// Return the number of bytes read.
   366  		return 4, nil
   367  	})
   368  	return old, err
   369  }
   370  
   371  // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
   372  func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
   373  	ar, ok := mm.CheckIORange(addr, 4)
   374  	if !ok {
   375  		return 0, linuxerr.EFAULT
   376  	}
   377  
   378  	// Do AddressSpace IO if applicable.
   379  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   380  		for {
   381  			prev, err := mm.as.CompareAndSwapUint32(addr, old, new)
   382  			if err == nil {
   383  				return prev, nil
   384  			}
   385  			if f, ok := err.(platform.SegmentationFault); ok {
   386  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
   387  					return 0, err
   388  				}
   389  				continue
   390  			}
   391  			return 0, translateIOError(ctx, err)
   392  		}
   393  	}
   394  
   395  	// Go through internal mappings.
   396  	var prev uint32
   397  	_, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   398  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   399  			// Atomicity is unachievable across mappings.
   400  			return 0, linuxerr.EFAULT
   401  		}
   402  		im := ims.Head()
   403  		var err error
   404  		prev, err = safemem.CompareAndSwapUint32(im, old, new)
   405  		if err != nil {
   406  			return 0, translateIOError(ctx, err)
   407  		}
   408  		// Return the number of bytes read.
   409  		return 4, nil
   410  	})
   411  	return prev, err
   412  }
   413  
   414  // LoadUint32 implements usermem.IO.LoadUint32.
   415  func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
   416  	ar, ok := mm.CheckIORange(addr, 4)
   417  	if !ok {
   418  		return 0, linuxerr.EFAULT
   419  	}
   420  
   421  	// Do AddressSpace IO if applicable.
   422  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   423  		for {
   424  			val, err := mm.as.LoadUint32(addr)
   425  			if err == nil {
   426  				return val, nil
   427  			}
   428  			if f, ok := err.(platform.SegmentationFault); ok {
   429  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
   430  					return 0, err
   431  				}
   432  				continue
   433  			}
   434  			return 0, translateIOError(ctx, err)
   435  		}
   436  	}
   437  
   438  	// Go through internal mappings.
   439  	var val uint32
   440  	_, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   441  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   442  			// Atomicity is unachievable across mappings.
   443  			return 0, linuxerr.EFAULT
   444  		}
   445  		im := ims.Head()
   446  		var err error
   447  		val, err = safemem.LoadUint32(im)
   448  		if err != nil {
   449  			return 0, translateIOError(ctx, err)
   450  		}
   451  		// Return the number of bytes read.
   452  		return 4, nil
   453  	})
   454  	return val, err
   455  }
   456  
   457  // handleASIOFault handles a page fault at address addr for an AddressSpaceIO
   458  // operation spanning ioar.
   459  //
   460  // Preconditions:
   461  //   - mm.as != nil.
   462  //   - ioar.Length() != 0.
   463  //   - ioar.Contains(addr).
   464  func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr hostarch.Addr, ioar hostarch.AddrRange, at hostarch.AccessType) error {
   465  	// Try to map all remaining pages in the I/O operation. This RoundUp can't
   466  	// overflow because otherwise it would have been caught by CheckIORange.
   467  	end, _ := ioar.End.RoundUp()
   468  	ar := hostarch.AddrRange{addr.RoundDown(), end}
   469  
   470  	// Don't bother trying existingPMAsLocked; in most cases, if we did have
   471  	// existing pmas, we wouldn't have faulted.
   472  
   473  	// Ensure that we have usable vmas. Here and below, only return early if we
   474  	// can't map the first (faulting) page; failure to map later pages are
   475  	// silently ignored. This maximizes partial success.
   476  	mm.mappingMu.RLock()
   477  	vseg, vend, err := mm.getVMAsLocked(ctx, ar, at, false)
   478  	if vendaddr := vend.Start(); vendaddr < ar.End {
   479  		if vendaddr <= ar.Start {
   480  			mm.mappingMu.RUnlock()
   481  			return translateIOError(ctx, err)
   482  		}
   483  		ar.End = vendaddr
   484  	}
   485  
   486  	// Ensure that we have usable pmas.
   487  	mm.activeMu.Lock()
   488  	pseg, pend, err := mm.getPMAsLocked(ctx, vseg, ar, at)
   489  	mm.mappingMu.RUnlock()
   490  	if pendaddr := pend.Start(); pendaddr < ar.End {
   491  		if pendaddr <= ar.Start {
   492  			mm.activeMu.Unlock()
   493  			return translateIOError(ctx, err)
   494  		}
   495  		ar.End = pendaddr
   496  	}
   497  
   498  	// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
   499  	// anymore.
   500  	mm.activeMu.DowngradeLock()
   501  
   502  	err = mm.mapASLocked(pseg, ar, false)
   503  	mm.activeMu.RUnlock()
   504  	return translateIOError(ctx, err)
   505  }
   506  
   507  // withInternalMappings ensures that pmas exist for all addresses in ar,
   508  // support access of type (at, ignorePermissions), and have internal mappings
   509  // cached. It then calls f with mm.activeMu locked for reading, passing
   510  // internal mappings for the subrange of ar for which this property holds.
   511  //
   512  // withInternalMappings takes a function returning uint64 since many safemem
   513  // functions have this property, but returns an int64 since this is usually
   514  // more useful for usermem.IO methods.
   515  //
   516  // Preconditions: 0 < ar.Length() <= math.MaxInt64.
   517  func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   518  	// If pmas are already available, we can do IO without touching mm.vmas or
   519  	// mm.mappingMu.
   520  	mm.activeMu.RLock()
   521  	if pseg := mm.existingPMAsLocked(ar, at, ignorePermissions, true /* needInternalMappings */); pseg.Ok() {
   522  		n, err := f(mm.internalMappingsLocked(pseg, ar))
   523  		mm.activeMu.RUnlock()
   524  		// Do not convert errors returned by f to EFAULT.
   525  		return int64(n), err
   526  	}
   527  	mm.activeMu.RUnlock()
   528  
   529  	// Ensure that we have usable vmas.
   530  	mm.mappingMu.RLock()
   531  	vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions)
   532  	if vendaddr := vend.Start(); vendaddr < ar.End {
   533  		if vendaddr <= ar.Start {
   534  			mm.mappingMu.RUnlock()
   535  			return 0, translateIOError(ctx, verr)
   536  		}
   537  		ar.End = vendaddr
   538  	}
   539  
   540  	// Ensure that we have usable pmas.
   541  	mm.activeMu.Lock()
   542  	pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at)
   543  	mm.mappingMu.RUnlock()
   544  	if pendaddr := pend.Start(); pendaddr < ar.End {
   545  		if pendaddr <= ar.Start {
   546  			mm.activeMu.Unlock()
   547  			return 0, translateIOError(ctx, perr)
   548  		}
   549  		ar.End = pendaddr
   550  	}
   551  	imend, imerr := mm.getPMAInternalMappingsLocked(pseg, ar)
   552  	mm.activeMu.DowngradeLock()
   553  	if imendaddr := imend.Start(); imendaddr < ar.End {
   554  		if imendaddr <= ar.Start {
   555  			mm.activeMu.RUnlock()
   556  			return 0, translateIOError(ctx, imerr)
   557  		}
   558  		ar.End = imendaddr
   559  	}
   560  
   561  	// Do I/O.
   562  	un, err := f(mm.internalMappingsLocked(pseg, ar))
   563  	mm.activeMu.RUnlock()
   564  	n := int64(un)
   565  
   566  	// Return the first error in order of progress through ar.
   567  	if err != nil {
   568  		// Do not convert errors returned by f to EFAULT.
   569  		return n, err
   570  	}
   571  	if imerr != nil {
   572  		return n, translateIOError(ctx, imerr)
   573  	}
   574  	if perr != nil {
   575  		return n, translateIOError(ctx, perr)
   576  	}
   577  	return n, translateIOError(ctx, verr)
   578  }
   579  
   580  // withVecInternalMappings ensures that pmas exist for all addresses in ars,
   581  // support access of type (at, ignorePermissions), and have internal mappings
   582  // cached. It then calls f with mm.activeMu locked for reading, passing
   583  // internal mappings for the subset of ars for which this property holds.
   584  //
   585  // Preconditions: !ars.IsEmpty().
   586  func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   587  	// withInternalMappings is faster than withVecInternalMappings because of
   588  	// iterator plumbing (this isn't generally practical in the vector case due
   589  	// to iterator invalidation between AddrRanges). Use it if possible.
   590  	if ars.NumRanges() == 1 {
   591  		return mm.withInternalMappings(ctx, ars.Head(), at, ignorePermissions, f)
   592  	}
   593  
   594  	// If pmas are already available, we can do IO without touching mm.vmas or
   595  	// mm.mappingMu.
   596  	mm.activeMu.RLock()
   597  	if mm.existingVecPMAsLocked(ars, at, ignorePermissions, true /* needInternalMappings */) {
   598  		n, err := f(mm.vecInternalMappingsLocked(ars))
   599  		mm.activeMu.RUnlock()
   600  		// Do not convert errors returned by f to EFAULT.
   601  		return int64(n), err
   602  	}
   603  	mm.activeMu.RUnlock()
   604  
   605  	// Ensure that we have usable vmas.
   606  	mm.mappingMu.RLock()
   607  	vars, verr := mm.getVecVMAsLocked(ctx, ars, at, ignorePermissions)
   608  	if vars.NumBytes() == 0 {
   609  		mm.mappingMu.RUnlock()
   610  		return 0, translateIOError(ctx, verr)
   611  	}
   612  
   613  	// Ensure that we have usable pmas.
   614  	mm.activeMu.Lock()
   615  	pars, perr := mm.getVecPMAsLocked(ctx, vars, at)
   616  	mm.mappingMu.RUnlock()
   617  	if pars.NumBytes() == 0 {
   618  		mm.activeMu.Unlock()
   619  		return 0, translateIOError(ctx, perr)
   620  	}
   621  	imars, imerr := mm.getVecPMAInternalMappingsLocked(pars)
   622  	mm.activeMu.DowngradeLock()
   623  	if imars.NumBytes() == 0 {
   624  		mm.activeMu.RUnlock()
   625  		return 0, translateIOError(ctx, imerr)
   626  	}
   627  
   628  	// Do I/O.
   629  	un, err := f(mm.vecInternalMappingsLocked(imars))
   630  	mm.activeMu.RUnlock()
   631  	n := int64(un)
   632  
   633  	// Return the first error in order of progress through ars.
   634  	if err != nil {
   635  		// Do not convert errors from f to EFAULT.
   636  		return n, err
   637  	}
   638  	if imerr != nil {
   639  		return n, translateIOError(ctx, imerr)
   640  	}
   641  	if perr != nil {
   642  		return n, translateIOError(ctx, perr)
   643  	}
   644  	return n, translateIOError(ctx, verr)
   645  }
   646  
   647  // truncatedAddrRangeSeq returns a copy of ars, but with the end truncated to
   648  // at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to
   649  // truncate hostarch.AddrRangeSeq when errors occur.
   650  //
   651  // Preconditions:
   652  //   - !arsit.IsEmpty().
   653  //   - end <= arsit.Head().End.
   654  func truncatedAddrRangeSeq(ars, arsit hostarch.AddrRangeSeq, end hostarch.Addr) hostarch.AddrRangeSeq {
   655  	ar := arsit.Head()
   656  	if end <= ar.Start {
   657  		return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes())
   658  	}
   659  	return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes() + int64(end-ar.Start))
   660  }