github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/mm/io.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mm
    16  
    17  import (
    18  	"github.com/metacubex/gvisor/pkg/context"
    19  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    20  	"github.com/metacubex/gvisor/pkg/hostarch"
    21  	"github.com/metacubex/gvisor/pkg/safemem"
    22  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    23  	"github.com/metacubex/gvisor/pkg/sentry/platform"
    24  	"github.com/metacubex/gvisor/pkg/usermem"
    25  )
    26  
    27  // There are two supported ways to copy data to/from application virtual
    28  // memory:
    29  //
    30  // 1. Internally-mapped copying: Determine the memmap.File that backs the
    31  // copied-to/from virtual address, obtain a mapping of its pages, and read or
    32  // write to the mapping.
    33  //
    34  // 2. AddressSpace copying: If platform.Platform.SupportsAddressSpaceIO() is
    35  // true, AddressSpace permissions are applicable, and an AddressSpace is
    36  // available, copy directly through the AddressSpace, handling faults as
    37  // needed.
    38  //
    39  // (Given that internally-mapped copying requires that backing memory is always
    40  // implemented using a host file descriptor, we could also preadv/pwritev to it
    41  // instead. But this would incur a host syscall for each use of the mapped
    42  // page, whereas mmap is a one-time cost.)
    43  //
    44  // The fixed overhead of internally-mapped copying is expected to be higher
    45  // than that of AddressSpace copying since the former always needs to translate
    46  // addresses, whereas the latter only needs to do so when faults occur.
    47  // However, the throughput of internally-mapped copying is expected to be
    48  // somewhat higher than that of AddressSpace copying due to the high cost of
    49  // page faults and because implementations of the latter usually rely on
    50  // safecopy, which doesn't use AVX registers. So we prefer to use AddressSpace
    51  // copying (when available) for smaller copies, and switch to internally-mapped
    52  // copying once a size threshold is exceeded.
    53  const (
    54  	// copyMapMinBytes is the size threshold for switching to internally-mapped
    55  	// copying in CopyOut, CopyIn, and ZeroOut.
    56  	copyMapMinBytes = 32 << 10 // 32 KB
    57  
    58  	// rwMapMinBytes is the size threshold for switching to internally-mapped
    59  	// copying in CopyOutFrom and CopyInTo. It's lower than copyMapMinBytes
    60  	// since AddressSpace copying in this case requires additional buffering;
    61  	// see CopyOutFrom for details.
    62  	rwMapMinBytes = 512
    63  )
    64  
    65  // CheckIORange is similar to hostarch.Addr.ToRange, but applies bounds checks
    66  // consistent with Linux's arch/x86/include/asm/uaccess.h:access_ok().
    67  //
    68  // Preconditions: length >= 0.
    69  func (mm *MemoryManager) CheckIORange(addr hostarch.Addr, length int64) (hostarch.AddrRange, bool) {
    70  	// Note that access_ok() constrains end even if length == 0.
    71  	ar, ok := addr.ToRange(uint64(length))
    72  	return ar, (ok && ar.End <= mm.layout.MaxAddr)
    73  }
    74  
    75  // checkIOVec applies bound checks consistent with Linux's
    76  // arch/x86/include/asm/uaccess.h:access_ok() to ars.
    77  func (mm *MemoryManager) checkIOVec(ars hostarch.AddrRangeSeq) bool {
    78  	for !ars.IsEmpty() {
    79  		ar := ars.Head()
    80  		if _, ok := mm.CheckIORange(ar.Start, int64(ar.Length())); !ok {
    81  			return false
    82  		}
    83  		ars = ars.Tail()
    84  	}
    85  	return true
    86  }
    87  
    88  func (mm *MemoryManager) asioEnabled(opts usermem.IOOpts) bool {
    89  	return mm.haveASIO && !opts.IgnorePermissions && opts.AddressSpaceActive
    90  }
    91  
    92  // translateIOError converts errors to EFAULT, as is usually reported for all
    93  // I/O errors originating from MM in Linux.
    94  func translateIOError(ctx context.Context, err error) error {
    95  	if err == nil {
    96  		return nil
    97  	}
    98  	if logIOErrors {
    99  		ctx.Debugf("MM I/O error: %v", err)
   100  	}
   101  	return linuxerr.EFAULT
   102  }
   103  
   104  // CopyOut implements usermem.IO.CopyOut.
   105  func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
   106  	ar, ok := mm.CheckIORange(addr, int64(len(src)))
   107  	if !ok {
   108  		return 0, linuxerr.EFAULT
   109  	}
   110  
   111  	if len(src) == 0 {
   112  		return 0, nil
   113  	}
   114  
   115  	// Do AddressSpace IO if applicable.
   116  	if mm.asioEnabled(opts) && len(src) < copyMapMinBytes {
   117  		return mm.asCopyOut(ctx, addr, src)
   118  	}
   119  
   120  	// Go through internal mappings.
   121  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   122  		n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
   123  		return n, translateIOError(ctx, err)
   124  	})
   125  	return int(n64), err
   126  }
   127  
   128  func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte) (int, error) {
   129  	var done int
   130  	for {
   131  		n, err := mm.as.CopyOut(addr+hostarch.Addr(done), src[done:])
   132  		done += n
   133  		if err == nil {
   134  			return done, nil
   135  		}
   136  		if f, ok := err.(platform.SegmentationFault); ok {
   137  			ar, _ := addr.ToRange(uint64(len(src)))
   138  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
   139  				return done, err
   140  			}
   141  			continue
   142  		}
   143  		return done, translateIOError(ctx, err)
   144  	}
   145  }
   146  
   147  // CopyIn implements usermem.IO.CopyIn.
   148  func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
   149  	ar, ok := mm.CheckIORange(addr, int64(len(dst)))
   150  	if !ok {
   151  		return 0, linuxerr.EFAULT
   152  	}
   153  
   154  	if len(dst) == 0 {
   155  		return 0, nil
   156  	}
   157  
   158  	// Do AddressSpace IO if applicable.
   159  	if mm.asioEnabled(opts) && len(dst) < copyMapMinBytes {
   160  		return mm.asCopyIn(ctx, addr, dst)
   161  	}
   162  
   163  	// Go through internal mappings.
   164  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   165  		n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims)
   166  		return n, translateIOError(ctx, err)
   167  	})
   168  	return int(n64), err
   169  }
   170  
   171  func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte) (int, error) {
   172  	var done int
   173  	for {
   174  		n, err := mm.as.CopyIn(addr+hostarch.Addr(done), dst[done:])
   175  		done += n
   176  		if err == nil {
   177  			return done, nil
   178  		}
   179  		if f, ok := err.(platform.SegmentationFault); ok {
   180  			ar, _ := addr.ToRange(uint64(len(dst)))
   181  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
   182  				return done, err
   183  			}
   184  			continue
   185  		}
   186  		return done, translateIOError(ctx, err)
   187  	}
   188  }
   189  
   190  // ZeroOut implements usermem.IO.ZeroOut.
   191  func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
   192  	ar, ok := mm.CheckIORange(addr, toZero)
   193  	if !ok {
   194  		return 0, linuxerr.EFAULT
   195  	}
   196  
   197  	if toZero == 0 {
   198  		return 0, nil
   199  	}
   200  
   201  	// Do AddressSpace IO if applicable.
   202  	if mm.asioEnabled(opts) && toZero < copyMapMinBytes {
   203  		return mm.asZeroOut(ctx, addr, toZero)
   204  	}
   205  
   206  	// Go through internal mappings.
   207  	return mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) {
   208  		n, err := safemem.ZeroSeq(dsts)
   209  		return n, translateIOError(ctx, err)
   210  	})
   211  }
   212  
   213  func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64) (int64, error) {
   214  	var done int64
   215  	for {
   216  		n, err := mm.as.ZeroOut(addr+hostarch.Addr(done), uintptr(toZero-done))
   217  		done += int64(n)
   218  		if err == nil {
   219  			return done, nil
   220  		}
   221  		if f, ok := err.(platform.SegmentationFault); ok {
   222  			ar, _ := addr.ToRange(uint64(toZero))
   223  			if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
   224  				return done, err
   225  			}
   226  			continue
   227  		}
   228  		return done, translateIOError(ctx, err)
   229  	}
   230  }
   231  
   232  // CopyOutFrom implements usermem.IO.CopyOutFrom.
   233  func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
   234  	if !mm.checkIOVec(ars) {
   235  		return 0, linuxerr.EFAULT
   236  	}
   237  
   238  	if ars.NumBytes() == 0 {
   239  		return 0, nil
   240  	}
   241  
   242  	// Do AddressSpace IO if applicable.
   243  	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
   244  		// We have to introduce a buffered copy, instead of just passing a
   245  		// safemem.BlockSeq representing addresses in the AddressSpace to src.
   246  		// This is because usermem.IO.CopyOutFrom() guarantees that it calls
   247  		// src.ReadToBlocks() at most once, which is incompatible with handling
   248  		// faults between calls. In the future, this is probably best resolved
   249  		// by introducing a CopyOutFrom variant or option that allows it to
   250  		// call src.ReadToBlocks() any number of times.
   251  		//
   252  		// This issue applies to CopyInTo as well.
   253  		buf := make([]byte, int(ars.NumBytes()))
   254  		bufN, bufErr := src.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)))
   255  		var done int64
   256  		for done < int64(bufN) {
   257  			ar := ars.Head()
   258  			cplen := int64(ar.Length())
   259  			if cplen > int64(bufN)-done {
   260  				cplen = int64(bufN) - done
   261  			}
   262  			n, err := mm.asCopyOut(ctx, ar.Start, buf[int(done):int(done+cplen)])
   263  			done += int64(n)
   264  			if err != nil {
   265  				return done, err
   266  			}
   267  			ars = ars.Tail()
   268  		}
   269  		// Do not convert errors returned by src to EFAULT.
   270  		return done, bufErr
   271  	}
   272  
   273  	// Go through internal mappings.
   274  	return mm.withVecInternalMappings(ctx, ars, hostarch.Write, opts.IgnorePermissions, src.ReadToBlocks)
   275  }
   276  
   277  // CopyInTo implements usermem.IO.CopyInTo.
   278  func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
   279  	if !mm.checkIOVec(ars) {
   280  		return 0, linuxerr.EFAULT
   281  	}
   282  
   283  	if ars.NumBytes() == 0 {
   284  		return 0, nil
   285  	}
   286  
   287  	// Do AddressSpace IO if applicable.
   288  	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
   289  		buf := make([]byte, int(ars.NumBytes()))
   290  		var done int
   291  		var bufErr error
   292  		for !ars.IsEmpty() {
   293  			ar := ars.Head()
   294  			var n int
   295  			n, bufErr = mm.asCopyIn(ctx, ar.Start, buf[done:done+int(ar.Length())])
   296  			done += n
   297  			if bufErr != nil {
   298  				break
   299  			}
   300  			ars = ars.Tail()
   301  		}
   302  		n, err := dst.WriteFromBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:done])))
   303  		if err != nil {
   304  			return int64(n), err
   305  		}
   306  		// Do not convert errors returned by dst to EFAULT.
   307  		return int64(n), bufErr
   308  	}
   309  
   310  	// Go through internal mappings.
   311  	return mm.withVecInternalMappings(ctx, ars, hostarch.Read, opts.IgnorePermissions, dst.WriteFromBlocks)
   312  }
   313  
   314  // EnsurePMAsExist attempts to ensure that PMAs exist for the given addr with the
   315  // requested length. It returns the length to which it was able to either
   316  // initialize PMAs for, or ascertain that PMAs exist for. If this length is
   317  // smaller than the requested length it returns an error explaining why.
   318  func (mm *MemoryManager) EnsurePMAsExist(ctx context.Context, addr hostarch.Addr, length int64, opts usermem.IOOpts) (int64, error) {
   319  	ar, ok := mm.CheckIORange(addr, length)
   320  	if !ok {
   321  		return 0, linuxerr.EFAULT
   322  	}
   323  	n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   324  		return uint64(ims.NumBytes()), nil
   325  	})
   326  	return int64(n64), err
   327  }
   328  
   329  // SwapUint32 implements usermem.IO.SwapUint32.
   330  func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
   331  	ar, ok := mm.CheckIORange(addr, 4)
   332  	if !ok {
   333  		return 0, linuxerr.EFAULT
   334  	}
   335  
   336  	// Do AddressSpace IO if applicable.
   337  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   338  		for {
   339  			old, err := mm.as.SwapUint32(addr, new)
   340  			if err == nil {
   341  				return old, nil
   342  			}
   343  			if f, ok := err.(platform.SegmentationFault); ok {
   344  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
   345  					return 0, err
   346  				}
   347  				continue
   348  			}
   349  			return 0, translateIOError(ctx, err)
   350  		}
   351  	}
   352  
   353  	// Go through internal mappings.
   354  	var old uint32
   355  	_, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   356  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   357  			// Atomicity is unachievable across mappings.
   358  			return 0, linuxerr.EFAULT
   359  		}
   360  		im := ims.Head()
   361  		var err error
   362  		old, err = safemem.SwapUint32(im, new)
   363  		if err != nil {
   364  			return 0, translateIOError(ctx, err)
   365  		}
   366  		// Return the number of bytes read.
   367  		return 4, nil
   368  	})
   369  	return old, err
   370  }
   371  
   372  // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
   373  func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
   374  	ar, ok := mm.CheckIORange(addr, 4)
   375  	if !ok {
   376  		return 0, linuxerr.EFAULT
   377  	}
   378  
   379  	// Do AddressSpace IO if applicable.
   380  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   381  		for {
   382  			prev, err := mm.as.CompareAndSwapUint32(addr, old, new)
   383  			if err == nil {
   384  				return prev, nil
   385  			}
   386  			if f, ok := err.(platform.SegmentationFault); ok {
   387  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
   388  					return 0, err
   389  				}
   390  				continue
   391  			}
   392  			return 0, translateIOError(ctx, err)
   393  		}
   394  	}
   395  
   396  	// Go through internal mappings.
   397  	var prev uint32
   398  	_, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   399  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   400  			// Atomicity is unachievable across mappings.
   401  			return 0, linuxerr.EFAULT
   402  		}
   403  		im := ims.Head()
   404  		var err error
   405  		prev, err = safemem.CompareAndSwapUint32(im, old, new)
   406  		if err != nil {
   407  			return 0, translateIOError(ctx, err)
   408  		}
   409  		// Return the number of bytes read.
   410  		return 4, nil
   411  	})
   412  	return prev, err
   413  }
   414  
   415  // LoadUint32 implements usermem.IO.LoadUint32.
   416  func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
   417  	ar, ok := mm.CheckIORange(addr, 4)
   418  	if !ok {
   419  		return 0, linuxerr.EFAULT
   420  	}
   421  
   422  	// Do AddressSpace IO if applicable.
   423  	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
   424  		for {
   425  			val, err := mm.as.LoadUint32(addr)
   426  			if err == nil {
   427  				return val, nil
   428  			}
   429  			if f, ok := err.(platform.SegmentationFault); ok {
   430  				if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
   431  					return 0, err
   432  				}
   433  				continue
   434  			}
   435  			return 0, translateIOError(ctx, err)
   436  		}
   437  	}
   438  
   439  	// Go through internal mappings.
   440  	var val uint32
   441  	_, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
   442  		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
   443  			// Atomicity is unachievable across mappings.
   444  			return 0, linuxerr.EFAULT
   445  		}
   446  		im := ims.Head()
   447  		var err error
   448  		val, err = safemem.LoadUint32(im)
   449  		if err != nil {
   450  			return 0, translateIOError(ctx, err)
   451  		}
   452  		// Return the number of bytes read.
   453  		return 4, nil
   454  	})
   455  	return val, err
   456  }
   457  
   458  // handleASIOFault handles a page fault at address addr for an AddressSpaceIO
   459  // operation spanning ioar.
   460  //
   461  // Preconditions:
   462  //   - mm.as != nil.
   463  //   - ioar.Length() != 0.
   464  //   - ioar.Contains(addr).
   465  func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr hostarch.Addr, ioar hostarch.AddrRange, at hostarch.AccessType) error {
   466  	// Try to map all remaining pages in the I/O operation. This RoundUp can't
   467  	// overflow because otherwise it would have been caught by CheckIORange.
   468  	end, _ := ioar.End.RoundUp()
   469  	ar := hostarch.AddrRange{addr.RoundDown(), end}
   470  
   471  	// Don't bother trying existingPMAsLocked; in most cases, if we did have
   472  	// existing pmas, we wouldn't have faulted.
   473  
   474  	// Ensure that we have usable vmas. Here and below, only return early if we
   475  	// can't map the first (faulting) page; failure to map later pages are
   476  	// silently ignored. This maximizes partial success.
   477  	mm.mappingMu.RLock()
   478  	vseg, vend, err := mm.getVMAsLocked(ctx, ar, at, false)
   479  	if vendaddr := vend.Start(); vendaddr < ar.End {
   480  		if vendaddr <= ar.Start {
   481  			mm.mappingMu.RUnlock()
   482  			return translateIOError(ctx, err)
   483  		}
   484  		ar.End = vendaddr
   485  	}
   486  
   487  	// Ensure that we have usable pmas.
   488  	mm.activeMu.Lock()
   489  	pseg, pend, err := mm.getPMAsLocked(ctx, vseg, ar, at)
   490  	mm.mappingMu.RUnlock()
   491  	if pendaddr := pend.Start(); pendaddr < ar.End {
   492  		if pendaddr <= ar.Start {
   493  			mm.activeMu.Unlock()
   494  			return translateIOError(ctx, err)
   495  		}
   496  		ar.End = pendaddr
   497  	}
   498  
   499  	// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
   500  	// anymore.
   501  	mm.activeMu.DowngradeLock()
   502  
   503  	err = mm.mapASLocked(pseg, ar, memmap.PlatformEffectDefault)
   504  	mm.activeMu.RUnlock()
   505  	return translateIOError(ctx, err)
   506  }
   507  
   508  // withInternalMappings ensures that pmas exist for all addresses in ar,
   509  // support access of type (at, ignorePermissions), and have internal mappings
   510  // cached. It then calls f with mm.activeMu locked for reading, passing
   511  // internal mappings for the subrange of ar for which this property holds.
   512  //
   513  // withInternalMappings takes a function returning uint64 since many safemem
   514  // functions have this property, but returns an int64 since this is usually
   515  // more useful for usermem.IO methods.
   516  //
   517  // Preconditions: 0 < ar.Length() <= math.MaxInt64.
   518  func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   519  	// If pmas are already available, we can do IO without touching mm.vmas or
   520  	// mm.mappingMu.
   521  	mm.activeMu.RLock()
   522  	if pseg := mm.existingPMAsLocked(ar, at, ignorePermissions, true /* needInternalMappings */); pseg.Ok() {
   523  		n, err := f(mm.internalMappingsLocked(pseg, ar))
   524  		mm.activeMu.RUnlock()
   525  		// Do not convert errors returned by f to EFAULT.
   526  		return int64(n), err
   527  	}
   528  	mm.activeMu.RUnlock()
   529  
   530  	// Ensure that we have usable vmas.
   531  	mm.mappingMu.RLock()
   532  	vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions)
   533  	if vendaddr := vend.Start(); vendaddr < ar.End {
   534  		if vendaddr <= ar.Start {
   535  			mm.mappingMu.RUnlock()
   536  			return 0, translateIOError(ctx, verr)
   537  		}
   538  		ar.End = vendaddr
   539  	}
   540  
   541  	// Ensure that we have usable pmas.
   542  	mm.activeMu.Lock()
   543  	pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at)
   544  	mm.mappingMu.RUnlock()
   545  	if pendaddr := pend.Start(); pendaddr < ar.End {
   546  		if pendaddr <= ar.Start {
   547  			mm.activeMu.Unlock()
   548  			return 0, translateIOError(ctx, perr)
   549  		}
   550  		ar.End = pendaddr
   551  	}
   552  	imend, imerr := mm.getPMAInternalMappingsLocked(pseg, ar)
   553  	mm.activeMu.DowngradeLock()
   554  	if imendaddr := imend.Start(); imendaddr < ar.End {
   555  		if imendaddr <= ar.Start {
   556  			mm.activeMu.RUnlock()
   557  			return 0, translateIOError(ctx, imerr)
   558  		}
   559  		ar.End = imendaddr
   560  	}
   561  
   562  	// Do I/O.
   563  	un, err := f(mm.internalMappingsLocked(pseg, ar))
   564  	mm.activeMu.RUnlock()
   565  	n := int64(un)
   566  
   567  	// Return the first error in order of progress through ar.
   568  	if err != nil {
   569  		// Do not convert errors returned by f to EFAULT.
   570  		return n, err
   571  	}
   572  	if imerr != nil {
   573  		return n, translateIOError(ctx, imerr)
   574  	}
   575  	if perr != nil {
   576  		return n, translateIOError(ctx, perr)
   577  	}
   578  	return n, translateIOError(ctx, verr)
   579  }
   580  
   581  // withVecInternalMappings ensures that pmas exist for all addresses in ars,
   582  // support access of type (at, ignorePermissions), and have internal mappings
   583  // cached. It then calls f with mm.activeMu locked for reading, passing
   584  // internal mappings for the subset of ars for which this property holds.
   585  //
   586  // Preconditions: !ars.IsEmpty().
   587  func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   588  	// withInternalMappings is faster than withVecInternalMappings because of
   589  	// iterator plumbing (this isn't generally practical in the vector case due
   590  	// to iterator invalidation between AddrRanges). Use it if possible.
   591  	if ars.NumRanges() == 1 {
   592  		return mm.withInternalMappings(ctx, ars.Head(), at, ignorePermissions, f)
   593  	}
   594  
   595  	// If pmas are already available, we can do IO without touching mm.vmas or
   596  	// mm.mappingMu.
   597  	mm.activeMu.RLock()
   598  	if mm.existingVecPMAsLocked(ars, at, ignorePermissions, true /* needInternalMappings */) {
   599  		n, err := f(mm.vecInternalMappingsLocked(ars))
   600  		mm.activeMu.RUnlock()
   601  		// Do not convert errors returned by f to EFAULT.
   602  		return int64(n), err
   603  	}
   604  	mm.activeMu.RUnlock()
   605  
   606  	// Ensure that we have usable vmas.
   607  	mm.mappingMu.RLock()
   608  	vars, verr := mm.getVecVMAsLocked(ctx, ars, at, ignorePermissions)
   609  	if vars.NumBytes() == 0 {
   610  		mm.mappingMu.RUnlock()
   611  		return 0, translateIOError(ctx, verr)
   612  	}
   613  
   614  	// Ensure that we have usable pmas.
   615  	mm.activeMu.Lock()
   616  	pars, perr := mm.getVecPMAsLocked(ctx, vars, at)
   617  	mm.mappingMu.RUnlock()
   618  	if pars.NumBytes() == 0 {
   619  		mm.activeMu.Unlock()
   620  		return 0, translateIOError(ctx, perr)
   621  	}
   622  	imars, imerr := mm.getVecPMAInternalMappingsLocked(pars)
   623  	mm.activeMu.DowngradeLock()
   624  	if imars.NumBytes() == 0 {
   625  		mm.activeMu.RUnlock()
   626  		return 0, translateIOError(ctx, imerr)
   627  	}
   628  
   629  	// Do I/O.
   630  	un, err := f(mm.vecInternalMappingsLocked(imars))
   631  	mm.activeMu.RUnlock()
   632  	n := int64(un)
   633  
   634  	// Return the first error in order of progress through ars.
   635  	if err != nil {
   636  		// Do not convert errors from f to EFAULT.
   637  		return n, err
   638  	}
   639  	if imerr != nil {
   640  		return n, translateIOError(ctx, imerr)
   641  	}
   642  	if perr != nil {
   643  		return n, translateIOError(ctx, perr)
   644  	}
   645  	return n, translateIOError(ctx, verr)
   646  }
   647  
   648  // truncatedAddrRangeSeq returns a copy of ars, but with the end truncated to
   649  // at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to
   650  // truncate hostarch.AddrRangeSeq when errors occur.
   651  //
   652  // Preconditions:
   653  //   - !arsit.IsEmpty().
   654  //   - end <= arsit.Head().End.
   655  func truncatedAddrRangeSeq(ars, arsit hostarch.AddrRangeSeq, end hostarch.Addr) hostarch.AddrRangeSeq {
   656  	ar := arsit.Head()
   657  	if end <= ar.Start {
   658  		return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes())
   659  	}
   660  	return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes() + int64(end-ar.Start))
   661  }