gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/memmap/memmap.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package memmap defines semantics for memory mappings. 16 package memmap 17 18 import ( 19 "fmt" 20 21 "gvisor.dev/gvisor/pkg/context" 22 "gvisor.dev/gvisor/pkg/hostarch" 23 "gvisor.dev/gvisor/pkg/safemem" 24 ) 25 26 // Mappable represents a memory-mappable object, a mutable mapping from uint64 27 // offsets to (File, uint64 File offset) pairs. 28 // 29 // See mm/mm.go for Mappable's place in the lock order. 30 // 31 // All Mappable methods have the following preconditions: 32 // - hostarch.AddrRanges and MappableRanges must be non-empty (Length() != 0). 33 // - hostarch.Addrs and Mappable offsets must be page-aligned. 34 type Mappable interface { 35 // AddMapping notifies the Mappable of a mapping from addresses ar in ms to 36 // offsets [offset, offset+ar.Length()) in this Mappable. 37 // 38 // The writable flag indicates whether the backing data for a Mappable can 39 // be modified through the mapping. Effectively, this means a shared mapping 40 // where Translate may be called with at.Write == true. This is a property 41 // established at mapping creation and must remain constant throughout the 42 // lifetime of the mapping. 43 // 44 // Preconditions: offset+ar.Length() does not overflow. 45 AddMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error 46 47 // RemoveMapping notifies the Mappable of the removal of a mapping from 48 // addresses ar in ms to offsets [offset, offset+ar.Length()) in this 49 // Mappable. 50 // 51 // Preconditions: 52 // * offset+ar.Length() does not overflow. 53 // * The removed mapping must exist. writable must match the 54 // corresponding call to AddMapping. 55 RemoveMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) 56 57 // CopyMapping notifies the Mappable of an attempt to copy a mapping in ms 58 // from srcAR to dstAR. For most Mappables, this is equivalent to 59 // AddMapping. Note that it is possible that srcAR.Length() != dstAR.Length(), 60 // and also that srcAR.Length() == 0. 61 // 62 // CopyMapping is only called when a mapping is copied within a given 63 // MappingSpace; it is analogous to Linux's vm_operations_struct::mremap. 64 // 65 // Preconditions: 66 // * offset+srcAR.Length() and offset+dstAR.Length() do not overflow. 67 // * The mapping at srcAR must exist. writable must match the 68 // corresponding call to AddMapping. 69 CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error 70 71 // Translate returns the Mappable's current mappings for at least the range 72 // of offsets specified by required, and at most the range of offsets 73 // specified by optional. at is the set of access types that may be 74 // performed using the returned Translations. If not all required offsets 75 // are translated, it returns a non-nil error explaining why. 76 // 77 // Translations are valid until invalidated by a callback to 78 // MappingSpace.Invalidate or until the caller removes its mapping of the 79 // translated range. Mappable implementations must ensure that at least one 80 // reference is held on all pages in a File that may be the result 81 // of a valid Translation. 82 // 83 // Preconditions: 84 // * required.Length() > 0. 85 // * optional.IsSupersetOf(required). 86 // * required and optional must be page-aligned. 87 // * The caller must have established a mapping for all of the queried 88 // offsets via a previous call to AddMapping. 89 // * The caller is responsible for ensuring that calls to Translate 90 // synchronize with invalidation. 91 // 92 // Postconditions: See CheckTranslateResult. 93 Translate(ctx context.Context, required, optional MappableRange, at hostarch.AccessType) ([]Translation, error) 94 95 // InvalidateUnsavable requests that the Mappable invalidate Translations 96 // that cannot be preserved across save/restore. 97 // 98 // Invariant: InvalidateUnsavable never races with concurrent calls to any 99 // other Mappable methods. 100 InvalidateUnsavable(ctx context.Context) error 101 } 102 103 // Translations are returned by Mappable.Translate. 104 type Translation struct { 105 // Source is the translated range in the Mappable. 106 Source MappableRange 107 108 // File is the mapped file. 109 File File 110 111 // Offset is the offset into File at which this Translation begins. 112 Offset uint64 113 114 // Perms is the set of permissions for which platform.AddressSpace.MapFile 115 // and platform.AddressSpace.MapInternal on this Translation is permitted. 116 Perms hostarch.AccessType 117 } 118 119 // FileRange returns the FileRange represented by t. 120 func (t Translation) FileRange() FileRange { 121 return FileRange{t.Offset, t.Offset + t.Source.Length()} 122 } 123 124 // CheckTranslateResult returns an error if (ts, terr) does not satisfy all 125 // postconditions for Mappable.Translate(required, optional, at). 126 // 127 // Preconditions: Same as Mappable.Translate. 128 func CheckTranslateResult(required, optional MappableRange, at hostarch.AccessType, ts []Translation, terr error) error { 129 // Verify that the inputs to Mappable.Translate were valid. 130 if !required.WellFormed() || required.Length() == 0 { 131 panic(fmt.Sprintf("invalid required range: %v", required)) 132 } 133 if !hostarch.Addr(required.Start).IsPageAligned() || !hostarch.Addr(required.End).IsPageAligned() { 134 panic(fmt.Sprintf("unaligned required range: %v", required)) 135 } 136 if !optional.IsSupersetOf(required) { 137 panic(fmt.Sprintf("optional range %v is not a superset of required range %v", optional, required)) 138 } 139 if !hostarch.Addr(optional.Start).IsPageAligned() || !hostarch.Addr(optional.End).IsPageAligned() { 140 panic(fmt.Sprintf("unaligned optional range: %v", optional)) 141 } 142 143 // The first Translation must include required.Start. 144 if len(ts) != 0 && !ts[0].Source.Contains(required.Start) { 145 return fmt.Errorf("first Translation %+v does not cover start of required range %v", ts[0], required) 146 } 147 for i, t := range ts { 148 if !t.Source.WellFormed() || t.Source.Length() == 0 { 149 return fmt.Errorf("Translation %+v has invalid Source", t) 150 } 151 if !hostarch.Addr(t.Source.Start).IsPageAligned() || !hostarch.Addr(t.Source.End).IsPageAligned() { 152 return fmt.Errorf("Translation %+v has unaligned Source", t) 153 } 154 if t.File == nil { 155 return fmt.Errorf("Translation %+v has nil File", t) 156 } 157 if !hostarch.Addr(t.Offset).IsPageAligned() { 158 return fmt.Errorf("Translation %+v has unaligned Offset", t) 159 } 160 // Translations must be contiguous and in increasing order of 161 // Translation.Source. 162 if i > 0 && ts[i-1].Source.End != t.Source.Start { 163 return fmt.Errorf("Translation %+v and Translation %+v are not contiguous", ts[i-1], t) 164 } 165 // At least part of each Translation must be required. 166 if t.Source.Intersect(required).Length() == 0 { 167 return fmt.Errorf("Translation %+v lies entirely outside required range %v", t, required) 168 } 169 // Translations must be constrained to the optional range. 170 if !optional.IsSupersetOf(t.Source) { 171 return fmt.Errorf("Translation %+v lies outside optional range %v", t, optional) 172 } 173 // Each Translation must permit a superset of requested accesses. 174 if !t.Perms.SupersetOf(at) { 175 return fmt.Errorf("Translation %+v does not permit all requested access types %v", t, at) 176 } 177 } 178 // If the set of Translations does not cover the entire required range, 179 // Translate must return a non-nil error explaining why. 180 if terr == nil { 181 if len(ts) == 0 { 182 return fmt.Errorf("no Translations and no error") 183 } 184 if t := ts[len(ts)-1]; !t.Source.Contains(required.End - 1) { 185 return fmt.Errorf("last Translation %+v does not reach end of required range %v, but Translate returned no error", t, required) 186 } 187 } 188 return nil 189 } 190 191 // BusError may be returned by implementations of Mappable.Translate for errors 192 // that should result in SIGBUS delivery if they cause application page fault 193 // handling to fail. 194 type BusError struct { 195 // Err is the original error. 196 Err error 197 } 198 199 // Error implements error.Error. 200 func (b *BusError) Error() string { 201 return fmt.Sprintf("BusError: %v", b.Err.Error()) 202 } 203 204 // MappableRange represents a range of uint64 offsets into a Mappable. 205 // 206 // type MappableRange <generated using go_generics> 207 208 // String implements fmt.Stringer.String. 209 func (mr MappableRange) String() string { 210 return fmt.Sprintf("[%#x, %#x)", mr.Start, mr.End) 211 } 212 213 // MappingSpace represents a mutable mapping from hostarch.Addrs to (Mappable, 214 // uint64 offset) pairs. 215 type MappingSpace interface { 216 // Invalidate is called to notify the MappingSpace that values returned by 217 // previous calls to Mappable.Translate for offsets mapped by addresses in 218 // ar are no longer valid. 219 // 220 // Invalidate must not take any locks preceding mm.MemoryManager.activeMu 221 // in the lock order. 222 // 223 // Preconditions: 224 // * ar.Length() != 0. 225 // * ar must be page-aligned. 226 Invalidate(ar hostarch.AddrRange, opts InvalidateOpts) 227 } 228 229 // InvalidateOpts holds options to MappingSpace.Invalidate. 230 type InvalidateOpts struct { 231 // InvalidatePrivate is true if private pages in the invalidated region 232 // should also be discarded, causing their data to be lost. 233 InvalidatePrivate bool 234 } 235 236 // MappingIdentity controls the lifetime of a Mappable, and provides 237 // information about the Mappable for /proc/[pid]/maps. It is distinct from 238 // Mappable because all Mappables that are coherent must compare equal to 239 // support the implementation of shared futexes, but different 240 // MappingIdentities may represent the same Mappable, in the same way that 241 // multiple fs.Files may represent the same fs.Inode. (This similarity is not 242 // coincidental; fs.File implements MappingIdentity, and some 243 // fs.InodeOperations implement Mappable.) 244 type MappingIdentity interface { 245 // IncRef increments the MappingIdentity's reference count. 246 IncRef() 247 248 // DecRef decrements the MappingIdentity's reference count. 249 DecRef(ctx context.Context) 250 251 // MappedName returns the application-visible name shown in 252 // /proc/[pid]/maps. 253 MappedName(ctx context.Context) string 254 255 // DeviceID returns the device number shown in /proc/[pid]/maps. 256 DeviceID() uint64 257 258 // InodeID returns the inode number shown in /proc/[pid]/maps. 259 InodeID() uint64 260 261 // Msync has the same semantics as fs.FileOperations.Fsync(ctx, 262 // int64(mr.Start), int64(mr.End-1), fs.SyncData). 263 // (fs.FileOperations.Fsync() takes an inclusive end, but mr.End is 264 // exclusive, hence mr.End-1.) It is defined rather than Fsync so that 265 // implementors don't need to depend on the fs package for fs.SyncType. 266 Msync(ctx context.Context, mr MappableRange) error 267 } 268 269 // MLockMode specifies the memory locking behavior of a memory mapping. 270 type MLockMode int 271 272 // Note that the ordering of MLockModes is significant; see 273 // mm.MemoryManager.defMLockMode. 274 const ( 275 // MLockNone specifies that a mapping has no memory locking behavior. 276 // 277 // This must be the zero value for MLockMode. 278 MLockNone MLockMode = iota 279 280 // MLockEager specifies that a mapping is memory-locked, as by mlock() or 281 // similar. Pages in the mapping should be made, and kept, resident in 282 // physical memory as soon as possible. 283 // 284 // As of this writing, MLockEager does not cause memory-locking to be 285 // requested from the host; it only affects the sentry's memory management 286 // behavior. 287 // 288 // MLockEager is analogous to Linux's VM_LOCKED. 289 MLockEager 290 291 // MLockLazy specifies that a mapping is memory-locked, as by mlock() or 292 // similar. Pages in the mapping should be kept resident in physical memory 293 // once they have been made resident due to e.g. a page fault. 294 // 295 // As of this writing, MLockLazy does not cause memory-locking to be 296 // requested from the host; in fact, it has virtually no effect, except for 297 // interactions between mlocked pages and other syscalls. 298 // 299 // MLockLazy is analogous to Linux's VM_LOCKED | VM_LOCKONFAULT. 300 MLockLazy 301 ) 302 303 // MMapOpts specifies a request to create a memory mapping. 304 type MMapOpts struct { 305 // Length is the length of the mapping. 306 Length uint64 307 308 // MappingIdentity controls the lifetime of Mappable, and provides 309 // properties of the mapping shown in /proc/[pid]/maps. If MMapOpts is used 310 // to successfully create a memory mapping, a reference is taken on 311 // MappingIdentity. 312 MappingIdentity MappingIdentity 313 314 // Mappable is the Mappable to be mapped. If Mappable is nil, the mapping 315 // is anonymous. If Mappable is not nil, it must remain valid as long as a 316 // reference is held on MappingIdentity. 317 Mappable Mappable 318 319 // Offset is the offset into Mappable to map. If Mappable is nil, Offset is 320 // ignored. 321 Offset uint64 322 323 // Addr is the suggested address for the mapping. 324 Addr hostarch.Addr 325 326 // Fixed specifies whether this is a fixed mapping (it must be located at 327 // Addr). 328 Fixed bool 329 330 // Unmap specifies whether existing mappings in the range being mapped may 331 // be replaced. If Unmap is true, Fixed must be true. 332 Unmap bool 333 334 // If Map32Bit is true, all addresses in the created mapping must fit in a 335 // 32-bit integer. (Note that the "end address" of the mapping, i.e. the 336 // address of the first byte *after* the mapping, need not fit in a 32-bit 337 // integer.) Map32Bit is ignored if Fixed is true. 338 Map32Bit bool 339 340 // Perms is the set of permissions to the applied to this mapping. 341 Perms hostarch.AccessType 342 343 // MaxPerms limits the set of permissions that may ever apply to this 344 // mapping. If Mappable is not nil, all memmap.Translations returned by 345 // Mappable.Translate must support all accesses in MaxPerms. 346 // 347 // Preconditions: MaxAccessType should be an effective AccessType, as 348 // access cannot be limited beyond effective AccessTypes. 349 MaxPerms hostarch.AccessType 350 351 // Private is true if writes to the mapping should be propagated to a copy 352 // that is exclusive to the MemoryManager. 353 Private bool 354 355 // GrowsDown is true if the mapping should be automatically expanded 356 // downward on guard page faults. 357 GrowsDown bool 358 359 PlatformEffect MMapPlatformEffect 360 361 // MLockMode specifies the memory locking behavior of the mapping. 362 MLockMode MLockMode 363 364 // Hint is the name used for the mapping in /proc/[pid]/maps. If Hint is 365 // empty, MappingIdentity.MappedName() will be used instead. 366 // 367 // TODO(jamieliu): Replace entirely with MappingIdentity? 368 Hint string 369 370 // Force means to skip validation checks of Addr and Length. It can be 371 // used to create special mappings below mm.layout.MinAddr and 372 // mm.layout.MaxAddr. It has to be used with caution. 373 // 374 // If Force is true, Unmap and Fixed must be true. 375 Force bool 376 377 // SentryOwnedContent indicates the sentry exclusively controls the 378 // underlying memory backing the mapping thus the memory content is 379 // guaranteed not to be modified outside the sentry's purview. 380 SentryOwnedContent bool 381 } 382 383 // MMapPlatformEffect is the type of MMapOpts.PlatformEffect. 384 type MMapPlatformEffect uint8 385 386 // Possible values for MMapOpts.PlatformEffect: 387 const ( 388 // PlatformEffectDefault indicates that no specific behavior is requested 389 // from the platform. 390 PlatformEffectDefault MMapPlatformEffect = iota 391 392 // PlatformEffectPopulate indicates that platform mappings should be 393 // established for all pages in the mapping. 394 PlatformEffectPopulate 395 396 // PlatformEffectCommit is like PlatformEffectPopulate, but also requests 397 // that the platform eagerly commit resources to the mapping, as in 398 // platform.AddressSpace.MapFile(precommit=true). 399 PlatformEffectCommit 400 ) 401 402 // File represents a host file that may be mapped into an platform.AddressSpace. 403 type File interface { 404 // All pages in a File are reference-counted. 405 406 // IncRef increments the reference count on all pages in fr and 407 // associates each page with a memCgID (memory cgroup id) to which it 408 // belongs. memCgID will not be changed if the page already exists. 409 // 410 // Preconditions: 411 // * fr.Start and fr.End must be page-aligned. 412 // * fr.Length() > 0. 413 // * At least one reference must be held on all pages in fr. (The File 414 // interface does not provide a way to acquire an initial reference; 415 // implementors may define mechanisms for doing so.) 416 IncRef(fr FileRange, memCgID uint32) 417 418 // DecRef decrements the reference count on all pages in fr. 419 // 420 // Preconditions: 421 // * fr.Start and fr.End must be page-aligned. 422 // * fr.Length() > 0. 423 // * At least one reference must be held on all pages in fr. 424 DecRef(fr FileRange) 425 426 // MapInternal returns a mapping of the given file offsets in the invoking 427 // process' address space for reading and writing. 428 // 429 // Note that fr.Start and fr.End need not be page-aligned. 430 // 431 // Preconditions: 432 // * fr.Length() > 0. 433 // * At least one reference must be held on all pages in fr. 434 // 435 // Postconditions: The returned mapping is valid as long as at least one 436 // reference is held on the mapped pages. 437 MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) 438 439 // BufferReadAt reads len(dst) bytes from the file into dst, starting at 440 // file offset off. It returns the number of bytes read. Like 441 // io.ReaderAt.ReadAt(), it never returns a short read with a nil error. 442 // 443 // Implementations of File for which MapInternal() never returns 444 // BufferedIOFallbackErr can embed NoBufferedIOFallback to obtain an 445 // appropriate implementation of BufferReadAt. 446 // 447 // Preconditions: 448 // * MapInternal() returned a BufferedIOFallbackErr. 449 // * At least one reference must be held on all read pages. 450 BufferReadAt(off uint64, dst []byte) (uint64, error) 451 452 // BufferWriteAt writes len(src) bytes src to the file, starting at file 453 // offset off. It returns the number of bytes written. Like 454 // io.WriterAt.WriteAt(), it never returns a short write with a nil error. 455 // 456 // Implementations of File for which MapInternal() never returns 457 // BufferedIOFallbackErr can embed NoBufferedIOFallback to obtain an 458 // appropriate implementation of BufferWriteAt. 459 // 460 // Preconditions: 461 // * MapInternal() returned a BufferedIOFallbackErr. 462 // * At least one reference must be held on all written pages. 463 BufferWriteAt(off uint64, src []byte) (uint64, error) 464 465 // FD returns the file descriptor represented by the File. 466 // 467 // The only permitted operation on the returned file descriptor is to map 468 // pages from it consistent with the requirements of AddressSpace.MapFile. 469 FD() int 470 } 471 472 // BufferedIOFallbackErr is returned (by value) by implementations of 473 // File.MapInternal() that cannot succeed, but can still support memory-mapped 474 // I/O by falling back to buffered reads and writes. 475 type BufferedIOFallbackErr struct{} 476 477 // Error implements error.Error. 478 func (BufferedIOFallbackErr) Error() string { 479 return "memmap.File.MapInternal() is unsupported, fall back to buffered R/W for internally-mapped I/O" 480 } 481 482 // NoBufferedIOFallback implements File.BufferReadAt() and BufferWriteAt() for 483 // implementations of File for which MapInternal() never returns 484 // BufferedIOFallbackErr. 485 type NoBufferedIOFallback struct{} 486 487 // BufferReadAt implements File.BufferReadAt. 488 func (NoBufferedIOFallback) BufferReadAt(off uint64, dst []byte) (uint64, error) { 489 panic("unimplemented: memmap.File.MapInternal() should not have returned BufferedIOFallbackErr") 490 } 491 492 // BufferWriteAt implements File.BufferWriteAt. 493 func (NoBufferedIOFallback) BufferWriteAt(off uint64, src []byte) (uint64, error) { 494 panic("unimplemented: memmap.File.MapInternal() should not have returned BufferedIOFallbackErr") 495 } 496 497 // FileRange represents a range of uint64 offsets into a File. 498 // 499 // type FileRange <generated using go_generics> 500 501 // String implements fmt.Stringer.String. 502 func (fr FileRange) String() string { 503 return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End) 504 }