github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/memmap/memmap.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package memmap defines semantics for memory mappings.
    16  package memmap
    17  
    18  import (
    19  	"fmt"
    20  
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/safemem"
    24  )
    25  
    26  // Mappable represents a memory-mappable object, a mutable mapping from uint64
    27  // offsets to (File, uint64 File offset) pairs.
    28  //
    29  // See mm/mm.go for Mappable's place in the lock order.
    30  //
    31  // All Mappable methods have the following preconditions:
    32  //   - hostarch.AddrRanges and MappableRanges must be non-empty (Length() != 0).
    33  //   - hostarch.Addrs and Mappable offsets must be page-aligned.
    34  type Mappable interface {
    35  	// AddMapping notifies the Mappable of a mapping from addresses ar in ms to
    36  	// offsets [offset, offset+ar.Length()) in this Mappable.
    37  	//
    38  	// The writable flag indicates whether the backing data for a Mappable can
    39  	// be modified through the mapping. Effectively, this means a shared mapping
    40  	// where Translate may be called with at.Write == true. This is a property
    41  	// established at mapping creation and must remain constant throughout the
    42  	// lifetime of the mapping.
    43  	//
    44  	// Preconditions: offset+ar.Length() does not overflow.
    45  	AddMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error
    46  
    47  	// RemoveMapping notifies the Mappable of the removal of a mapping from
    48  	// addresses ar in ms to offsets [offset, offset+ar.Length()) in this
    49  	// Mappable.
    50  	//
    51  	// Preconditions:
    52  	//	* offset+ar.Length() does not overflow.
    53  	//	* The removed mapping must exist. writable must match the
    54  	//		corresponding call to AddMapping.
    55  	RemoveMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool)
    56  
    57  	// CopyMapping notifies the Mappable of an attempt to copy a mapping in ms
    58  	// from srcAR to dstAR. For most Mappables, this is equivalent to
    59  	// AddMapping. Note that it is possible that srcAR.Length() != dstAR.Length(),
    60  	// and also that srcAR.Length() == 0.
    61  	//
    62  	// CopyMapping is only called when a mapping is copied within a given
    63  	// MappingSpace; it is analogous to Linux's vm_operations_struct::mremap.
    64  	//
    65  	// Preconditions:
    66  	//	* offset+srcAR.Length() and offset+dstAR.Length() do not overflow.
    67  	//	* The mapping at srcAR must exist. writable must match the
    68  	//		corresponding call to AddMapping.
    69  	CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error
    70  
    71  	// Translate returns the Mappable's current mappings for at least the range
    72  	// of offsets specified by required, and at most the range of offsets
    73  	// specified by optional. at is the set of access types that may be
    74  	// performed using the returned Translations. If not all required offsets
    75  	// are translated, it returns a non-nil error explaining why.
    76  	//
    77  	// Translations are valid until invalidated by a callback to
    78  	// MappingSpace.Invalidate or until the caller removes its mapping of the
    79  	// translated range. Mappable implementations must ensure that at least one
    80  	// reference is held on all pages in a File that may be the result
    81  	// of a valid Translation.
    82  	//
    83  	// Preconditions:
    84  	//	* required.Length() > 0.
    85  	//	* optional.IsSupersetOf(required).
    86  	//	* required and optional must be page-aligned.
    87  	//	* The caller must have established a mapping for all of the queried
    88  	//		offsets via a previous call to AddMapping.
    89  	//	* The caller is responsible for ensuring that calls to Translate
    90  	//		synchronize with invalidation.
    91  	//
    92  	// Postconditions: See CheckTranslateResult.
    93  	Translate(ctx context.Context, required, optional MappableRange, at hostarch.AccessType) ([]Translation, error)
    94  
    95  	// InvalidateUnsavable requests that the Mappable invalidate Translations
    96  	// that cannot be preserved across save/restore.
    97  	//
    98  	// Invariant: InvalidateUnsavable never races with concurrent calls to any
    99  	// other Mappable methods.
   100  	InvalidateUnsavable(ctx context.Context) error
   101  }
   102  
   103  // Translations are returned by Mappable.Translate.
   104  type Translation struct {
   105  	// Source is the translated range in the Mappable.
   106  	Source MappableRange
   107  
   108  	// File is the mapped file.
   109  	File File
   110  
   111  	// Offset is the offset into File at which this Translation begins.
   112  	Offset uint64
   113  
   114  	// Perms is the set of permissions for which platform.AddressSpace.MapFile
   115  	// and platform.AddressSpace.MapInternal on this Translation is permitted.
   116  	Perms hostarch.AccessType
   117  }
   118  
   119  // FileRange returns the FileRange represented by t.
   120  func (t Translation) FileRange() FileRange {
   121  	return FileRange{t.Offset, t.Offset + t.Source.Length()}
   122  }
   123  
   124  // CheckTranslateResult returns an error if (ts, terr) does not satisfy all
   125  // postconditions for Mappable.Translate(required, optional, at).
   126  //
   127  // Preconditions: Same as Mappable.Translate.
   128  func CheckTranslateResult(required, optional MappableRange, at hostarch.AccessType, ts []Translation, terr error) error {
   129  	// Verify that the inputs to Mappable.Translate were valid.
   130  	if !required.WellFormed() || required.Length() == 0 {
   131  		panic(fmt.Sprintf("invalid required range: %v", required))
   132  	}
   133  	if !hostarch.Addr(required.Start).IsPageAligned() || !hostarch.Addr(required.End).IsPageAligned() {
   134  		panic(fmt.Sprintf("unaligned required range: %v", required))
   135  	}
   136  	if !optional.IsSupersetOf(required) {
   137  		panic(fmt.Sprintf("optional range %v is not a superset of required range %v", optional, required))
   138  	}
   139  	if !hostarch.Addr(optional.Start).IsPageAligned() || !hostarch.Addr(optional.End).IsPageAligned() {
   140  		panic(fmt.Sprintf("unaligned optional range: %v", optional))
   141  	}
   142  
   143  	// The first Translation must include required.Start.
   144  	if len(ts) != 0 && !ts[0].Source.Contains(required.Start) {
   145  		return fmt.Errorf("first Translation %+v does not cover start of required range %v", ts[0], required)
   146  	}
   147  	for i, t := range ts {
   148  		if !t.Source.WellFormed() || t.Source.Length() == 0 {
   149  			return fmt.Errorf("Translation %+v has invalid Source", t)
   150  		}
   151  		if !hostarch.Addr(t.Source.Start).IsPageAligned() || !hostarch.Addr(t.Source.End).IsPageAligned() {
   152  			return fmt.Errorf("Translation %+v has unaligned Source", t)
   153  		}
   154  		if t.File == nil {
   155  			return fmt.Errorf("Translation %+v has nil File", t)
   156  		}
   157  		if !hostarch.Addr(t.Offset).IsPageAligned() {
   158  			return fmt.Errorf("Translation %+v has unaligned Offset", t)
   159  		}
   160  		// Translations must be contiguous and in increasing order of
   161  		// Translation.Source.
   162  		if i > 0 && ts[i-1].Source.End != t.Source.Start {
   163  			return fmt.Errorf("Translation %+v and Translation %+v are not contiguous", ts[i-1], t)
   164  		}
   165  		// At least part of each Translation must be required.
   166  		if t.Source.Intersect(required).Length() == 0 {
   167  			return fmt.Errorf("Translation %+v lies entirely outside required range %v", t, required)
   168  		}
   169  		// Translations must be constrained to the optional range.
   170  		if !optional.IsSupersetOf(t.Source) {
   171  			return fmt.Errorf("Translation %+v lies outside optional range %v", t, optional)
   172  		}
   173  		// Each Translation must permit a superset of requested accesses.
   174  		if !t.Perms.SupersetOf(at) {
   175  			return fmt.Errorf("Translation %+v does not permit all requested access types %v", t, at)
   176  		}
   177  	}
   178  	// If the set of Translations does not cover the entire required range,
   179  	// Translate must return a non-nil error explaining why.
   180  	if terr == nil {
   181  		if len(ts) == 0 {
   182  			return fmt.Errorf("no Translations and no error")
   183  		}
   184  		if t := ts[len(ts)-1]; !t.Source.Contains(required.End - 1) {
   185  			return fmt.Errorf("last Translation %+v does not reach end of required range %v, but Translate returned no error", t, required)
   186  		}
   187  	}
   188  	return nil
   189  }
   190  
   191  // BusError may be returned by implementations of Mappable.Translate for errors
   192  // that should result in SIGBUS delivery if they cause application page fault
   193  // handling to fail.
   194  type BusError struct {
   195  	// Err is the original error.
   196  	Err error
   197  }
   198  
   199  // Error implements error.Error.
   200  func (b *BusError) Error() string {
   201  	return fmt.Sprintf("BusError: %v", b.Err.Error())
   202  }
   203  
   204  // MappableRange represents a range of uint64 offsets into a Mappable.
   205  //
   206  // type MappableRange <generated using go_generics>
   207  
   208  // String implements fmt.Stringer.String.
   209  func (mr MappableRange) String() string {
   210  	return fmt.Sprintf("[%#x, %#x)", mr.Start, mr.End)
   211  }
   212  
   213  // MappingSpace represents a mutable mapping from hostarch.Addrs to (Mappable,
   214  // uint64 offset) pairs.
   215  type MappingSpace interface {
   216  	// Invalidate is called to notify the MappingSpace that values returned by
   217  	// previous calls to Mappable.Translate for offsets mapped by addresses in
   218  	// ar are no longer valid.
   219  	//
   220  	// Invalidate must not take any locks preceding mm.MemoryManager.activeMu
   221  	// in the lock order.
   222  	//
   223  	// Preconditions:
   224  	//	* ar.Length() != 0.
   225  	//	* ar must be page-aligned.
   226  	Invalidate(ar hostarch.AddrRange, opts InvalidateOpts)
   227  }
   228  
   229  // InvalidateOpts holds options to MappingSpace.Invalidate.
   230  type InvalidateOpts struct {
   231  	// InvalidatePrivate is true if private pages in the invalidated region
   232  	// should also be discarded, causing their data to be lost.
   233  	InvalidatePrivate bool
   234  }
   235  
   236  // MappingIdentity controls the lifetime of a Mappable, and provides
   237  // information about the Mappable for /proc/[pid]/maps. It is distinct from
   238  // Mappable because all Mappables that are coherent must compare equal to
   239  // support the implementation of shared futexes, but different
   240  // MappingIdentities may represent the same Mappable, in the same way that
   241  // multiple fs.Files may represent the same fs.Inode. (This similarity is not
   242  // coincidental; fs.File implements MappingIdentity, and some
   243  // fs.InodeOperations implement Mappable.)
   244  type MappingIdentity interface {
   245  	// IncRef increments the MappingIdentity's reference count.
   246  	IncRef()
   247  
   248  	// DecRef decrements the MappingIdentity's reference count.
   249  	DecRef(ctx context.Context)
   250  
   251  	// MappedName returns the application-visible name shown in
   252  	// /proc/[pid]/maps.
   253  	MappedName(ctx context.Context) string
   254  
   255  	// DeviceID returns the device number shown in /proc/[pid]/maps.
   256  	DeviceID() uint64
   257  
   258  	// InodeID returns the inode number shown in /proc/[pid]/maps.
   259  	InodeID() uint64
   260  
   261  	// Msync has the same semantics as fs.FileOperations.Fsync(ctx,
   262  	// int64(mr.Start), int64(mr.End-1), fs.SyncData).
   263  	// (fs.FileOperations.Fsync() takes an inclusive end, but mr.End is
   264  	// exclusive, hence mr.End-1.) It is defined rather than Fsync so that
   265  	// implementors don't need to depend on the fs package for fs.SyncType.
   266  	Msync(ctx context.Context, mr MappableRange) error
   267  }
   268  
   269  // MLockMode specifies the memory locking behavior of a memory mapping.
   270  type MLockMode int
   271  
   272  // Note that the ordering of MLockModes is significant; see
   273  // mm.MemoryManager.defMLockMode.
   274  const (
   275  	// MLockNone specifies that a mapping has no memory locking behavior.
   276  	//
   277  	// This must be the zero value for MLockMode.
   278  	MLockNone MLockMode = iota
   279  
   280  	// MLockEager specifies that a mapping is memory-locked, as by mlock() or
   281  	// similar. Pages in the mapping should be made, and kept, resident in
   282  	// physical memory as soon as possible.
   283  	//
   284  	// As of this writing, MLockEager does not cause memory-locking to be
   285  	// requested from the host; it only affects the sentry's memory management
   286  	// behavior.
   287  	//
   288  	// MLockEager is analogous to Linux's VM_LOCKED.
   289  	MLockEager
   290  
   291  	// MLockLazy specifies that a mapping is memory-locked, as by mlock() or
   292  	// similar. Pages in the mapping should be kept resident in physical memory
   293  	// once they have been made resident due to e.g. a page fault.
   294  	//
   295  	// As of this writing, MLockLazy does not cause memory-locking to be
   296  	// requested from the host; in fact, it has virtually no effect, except for
   297  	// interactions between mlocked pages and other syscalls.
   298  	//
   299  	// MLockLazy is analogous to Linux's VM_LOCKED | VM_LOCKONFAULT.
   300  	MLockLazy
   301  )
   302  
   303  // MMapOpts specifies a request to create a memory mapping.
   304  type MMapOpts struct {
   305  	// Length is the length of the mapping.
   306  	Length uint64
   307  
   308  	// MappingIdentity controls the lifetime of Mappable, and provides
   309  	// properties of the mapping shown in /proc/[pid]/maps. If MMapOpts is used
   310  	// to successfully create a memory mapping, a reference is taken on
   311  	// MappingIdentity.
   312  	MappingIdentity MappingIdentity
   313  
   314  	// Mappable is the Mappable to be mapped. If Mappable is nil, the mapping
   315  	// is anonymous. If Mappable is not nil, it must remain valid as long as a
   316  	// reference is held on MappingIdentity.
   317  	Mappable Mappable
   318  
   319  	// Offset is the offset into Mappable to map. If Mappable is nil, Offset is
   320  	// ignored.
   321  	Offset uint64
   322  
   323  	// Addr is the suggested address for the mapping.
   324  	Addr hostarch.Addr
   325  
   326  	// Fixed specifies whether this is a fixed mapping (it must be located at
   327  	// Addr).
   328  	Fixed bool
   329  
   330  	// Unmap specifies whether existing mappings in the range being mapped may
   331  	// be replaced. If Unmap is true, Fixed must be true.
   332  	Unmap bool
   333  
   334  	// If Map32Bit is true, all addresses in the created mapping must fit in a
   335  	// 32-bit integer. (Note that the "end address" of the mapping, i.e. the
   336  	// address of the first byte *after* the mapping, need not fit in a 32-bit
   337  	// integer.) Map32Bit is ignored if Fixed is true.
   338  	Map32Bit bool
   339  
   340  	// Perms is the set of permissions to the applied to this mapping.
   341  	Perms hostarch.AccessType
   342  
   343  	// MaxPerms limits the set of permissions that may ever apply to this
   344  	// mapping. If Mappable is not nil, all memmap.Translations returned by
   345  	// Mappable.Translate must support all accesses in MaxPerms.
   346  	//
   347  	// Preconditions: MaxAccessType should be an effective AccessType, as
   348  	// access cannot be limited beyond effective AccessTypes.
   349  	MaxPerms hostarch.AccessType
   350  
   351  	// Private is true if writes to the mapping should be propagated to a copy
   352  	// that is exclusive to the MemoryManager.
   353  	Private bool
   354  
   355  	// GrowsDown is true if the mapping should be automatically expanded
   356  	// downward on guard page faults.
   357  	GrowsDown bool
   358  
   359  	// Precommit is true if the platform should eagerly commit resources to the
   360  	// mapping (see platform.AddressSpace.MapFile).
   361  	Precommit bool
   362  
   363  	// MLockMode specifies the memory locking behavior of the mapping.
   364  	MLockMode MLockMode
   365  
   366  	// Hint is the name used for the mapping in /proc/[pid]/maps. If Hint is
   367  	// empty, MappingIdentity.MappedName() will be used instead.
   368  	//
   369  	// TODO(jamieliu): Replace entirely with MappingIdentity?
   370  	Hint string
   371  
   372  	// Force means to skip validation checks of Addr and Length. It can be
   373  	// used to create special mappings below mm.layout.MinAddr and
   374  	// mm.layout.MaxAddr. It has to be used with caution.
   375  	//
   376  	// If Force is true, Unmap and Fixed must be true.
   377  	Force bool
   378  
   379  	// SentryOwnedContent indicates the sentry exclusively controls the
   380  	// underlying memory backing the mapping thus the memory content is
   381  	// guaranteed not to be modified outside the sentry's purview.
   382  	SentryOwnedContent bool
   383  }
   384  
   385  // File represents a host file that may be mapped into an platform.AddressSpace.
   386  type File interface {
   387  	// All pages in a File are reference-counted.
   388  
   389  	// IncRef increments the reference count on all pages in fr and
   390  	// associates each page with a memCgID (memory cgroup id) to which it
   391  	// belongs. memCgID will not be changed if the page already exists.
   392  	//
   393  	// Preconditions:
   394  	//	* fr.Start and fr.End must be page-aligned.
   395  	//	* fr.Length() > 0.
   396  	//	* At least one reference must be held on all pages in fr. (The File
   397  	//		interface does not provide a way to acquire an initial reference;
   398  	//		implementors may define mechanisms for doing so.)
   399  	IncRef(fr FileRange, memCgID uint32)
   400  
   401  	// DecRef decrements the reference count on all pages in fr.
   402  	//
   403  	// Preconditions:
   404  	//	* fr.Start and fr.End must be page-aligned.
   405  	//	* fr.Length() > 0.
   406  	//	* At least one reference must be held on all pages in fr.
   407  	DecRef(fr FileRange)
   408  
   409  	// MapInternal returns a mapping of the given file offsets in the invoking
   410  	// process' address space for reading and writing.
   411  	//
   412  	// Note that fr.Start and fr.End need not be page-aligned.
   413  	//
   414  	// Preconditions:
   415  	//	* fr.Length() > 0.
   416  	//	* At least one reference must be held on all pages in fr.
   417  	//
   418  	// Postconditions: The returned mapping is valid as long as at least one
   419  	// reference is held on the mapped pages.
   420  	MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error)
   421  
   422  	// FD returns the file descriptor represented by the File.
   423  	//
   424  	// The only permitted operation on the returned file descriptor is to map
   425  	// pages from it consistent with the requirements of AddressSpace.MapFile.
   426  	FD() int
   427  }
   428  
   429  // FileRange represents a range of uint64 offsets into a File.
   430  //
   431  // type FileRange <generated using go_generics>
   432  
   433  // String implements fmt.Stringer.String.
   434  func (fr FileRange) String() string {
   435  	return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End)
   436  }