github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/platform.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package platform provides a Platform abstraction. 16 // 17 // See Platform for more information. 18 package platform 19 20 import ( 21 "fmt" 22 "os" 23 24 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 25 "github.com/MerlinKodo/gvisor/pkg/context" 26 "github.com/MerlinKodo/gvisor/pkg/hostarch" 27 "github.com/MerlinKodo/gvisor/pkg/seccomp" 28 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/hostmm" 30 "github.com/MerlinKodo/gvisor/pkg/sentry/memmap" 31 "github.com/MerlinKodo/gvisor/pkg/usermem" 32 ) 33 34 // Platform provides abstractions for execution contexts (Context, 35 // AddressSpace). 36 type Platform interface { 37 // SupportsAddressSpaceIO returns true if AddressSpaces returned by this 38 // Platform support AddressSpaceIO methods. 39 // 40 // The value returned by SupportsAddressSpaceIO is guaranteed to remain 41 // unchanged over the lifetime of the Platform. 42 SupportsAddressSpaceIO() bool 43 44 // CooperativelySchedulesAddressSpace returns true if the Platform has a 45 // limited number of AddressSpaces, such that mm.MemoryManager.Deactivate 46 // should call AddressSpace.Release when there are no goroutines that 47 // require the mm.MemoryManager to have an active AddressSpace. 48 // 49 // The value returned by CooperativelySchedulesAddressSpace is guaranteed 50 // to remain unchanged over the lifetime of the Platform. 51 CooperativelySchedulesAddressSpace() bool 52 53 // DetectsCPUPreemption returns true if Contexts returned by the Platform 54 // can reliably return ErrContextCPUPreempted. 55 DetectsCPUPreemption() bool 56 57 // HaveGlobalMemoryBarrier returns true if the GlobalMemoryBarrier method 58 // is supported. 59 HaveGlobalMemoryBarrier() bool 60 61 // OwnsPageTables returns true if the Platform implementation manages any 62 // page tables directly (rather than via host mmap(2) etc.) As of this 63 // writing, this property is relevant because the AddressSpace interface 64 // does not support specification of memory type (cacheability), such that 65 // host FDs specifying memory types (e.g. device drivers) can only set them 66 // correctly in host-managed page tables. 67 OwnsPageTables() bool 68 69 // MapUnit returns the alignment used for optional mappings into this 70 // platform's AddressSpaces. Higher values indicate lower per-page costs 71 // for AddressSpace.MapFile. As a special case, a MapUnit of 0 indicates 72 // that the cost of AddressSpace.MapFile is effectively independent of the 73 // number of pages mapped. If MapUnit is non-zero, it must be a power-of-2 74 // multiple of hostarch.PageSize. 75 MapUnit() uint64 76 77 // MinUserAddress returns the minimum mappable address on this 78 // platform. 79 MinUserAddress() hostarch.Addr 80 81 // MaxUserAddress returns the maximum mappable address on this 82 // platform. 83 MaxUserAddress() hostarch.Addr 84 85 // NewAddressSpace returns a new memory context for this platform. 86 // 87 // If mappingsID is not nil, the platform may assume that (1) all calls 88 // to NewAddressSpace with the same mappingsID represent the same 89 // (mutable) set of mappings, and (2) the set of mappings has not 90 // changed since the last time AddressSpace.Release was called on an 91 // AddressSpace returned by a call to NewAddressSpace with the same 92 // mappingsID. 93 // 94 // If a new AddressSpace cannot be created immediately, a nil 95 // AddressSpace is returned, along with channel that is closed when 96 // the caller should retry a call to NewAddressSpace. 97 // 98 // In general, this blocking behavior only occurs when 99 // CooperativelySchedulesAddressSpace (above) returns false. 100 NewAddressSpace(mappingsID any) (AddressSpace, <-chan struct{}, error) 101 102 // NewContext returns a new execution context. 103 NewContext(context.Context) Context 104 105 // PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well 106 // as the first following call to Context.Switch() for each Context, to 107 // return ErrContextCPUPreempted. 108 // 109 // PreemptAllCPUs is only supported if DetectsCPUPremption() == true. 110 // Platforms for which this does not hold may panic if PreemptAllCPUs is 111 // called. 112 PreemptAllCPUs() error 113 114 // GlobalMemoryBarrier blocks until all threads running application code 115 // (via Context.Switch) and all task goroutines "have passed through a 116 // state where all memory accesses to user-space addresses match program 117 // order between entry to and return from [GlobalMemoryBarrier]", as for 118 // membarrier(2). 119 // 120 // Preconditions: HaveGlobalMemoryBarrier() == true. 121 GlobalMemoryBarrier() error 122 123 // SyscallFilters returns syscalls made exclusively by this platform. 124 SyscallFilters() seccomp.SyscallRules 125 } 126 127 // NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and 128 // dependent methods for Platforms that do not support this feature. 129 type NoCPUPreemptionDetection struct{} 130 131 // DetectsCPUPreemption implements Platform.DetectsCPUPreemption. 132 func (NoCPUPreemptionDetection) DetectsCPUPreemption() bool { 133 return false 134 } 135 136 // PreemptAllCPUs implements Platform.PreemptAllCPUs. 137 func (NoCPUPreemptionDetection) PreemptAllCPUs() error { 138 panic("This platform does not support CPU preemption detection") 139 } 140 141 // UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and 142 // Platform.GlobalMemoryBarrier by invoking equivalent functionality on the 143 // host. 144 type UseHostGlobalMemoryBarrier struct{} 145 146 // HaveGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier. 147 func (UseHostGlobalMemoryBarrier) HaveGlobalMemoryBarrier() bool { 148 return hostmm.HaveGlobalMemoryBarrier() 149 } 150 151 // GlobalMemoryBarrier implements Platform.GlobalMemoryBarrier. 152 func (UseHostGlobalMemoryBarrier) GlobalMemoryBarrier() error { 153 return hostmm.GlobalMemoryBarrier() 154 } 155 156 // UseHostProcessMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and 157 // Platform.GlobalMemoryBarrier by invoking a process-local memory barrier. 158 // This is faster than UseHostGlobalMemoryBarrier, but is only appropriate for 159 // platforms for which application code executes while using the sentry's 160 // mm_struct. 161 type UseHostProcessMemoryBarrier struct{} 162 163 // HaveGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier. 164 func (UseHostProcessMemoryBarrier) HaveGlobalMemoryBarrier() bool { 165 // Fall back to a global memory barrier if a process-local one isn't 166 // available. 167 return hostmm.HaveProcessMemoryBarrier() || hostmm.HaveGlobalMemoryBarrier() 168 } 169 170 // GlobalMemoryBarrier implements Platform.GlobalMemoryBarrier. 171 func (UseHostProcessMemoryBarrier) GlobalMemoryBarrier() error { 172 if hostmm.HaveProcessMemoryBarrier() { 173 return hostmm.ProcessMemoryBarrier() 174 } 175 return hostmm.GlobalMemoryBarrier() 176 } 177 178 // DoesOwnPageTables implements Platform.OwnsPageTables in the positive. 179 type DoesOwnPageTables struct{} 180 181 // OwnsPageTables implements Platform.OwnsPageTables. 182 func (DoesOwnPageTables) OwnsPageTables() bool { 183 return true 184 } 185 186 // DoesNotOwnPageTables implements Platform.OwnsPageTables in the negative. 187 type DoesNotOwnPageTables struct{} 188 189 // OwnsPageTables implements Platform.OwnsPageTables. 190 func (DoesNotOwnPageTables) OwnsPageTables() bool { 191 return false 192 } 193 194 // MemoryManager represents an abstraction above the platform address space 195 // which manages memory mappings and their contents. 196 type MemoryManager interface { 197 //usermem.IO provides access to the contents of a virtual memory space. 198 usermem.IO 199 // MMap establishes a memory mapping. 200 MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) 201 // AddressSpace returns the AddressSpace bound to mm. 202 AddressSpace() AddressSpace 203 // FindVMAByName finds a vma with the specified name. 204 FindVMAByName(ar hostarch.AddrRange, hint string) (hostarch.Addr, uint64, error) 205 } 206 207 // Context represents the execution context for a single thread. 208 type Context interface { 209 // Switch resumes execution of the thread specified by the arch.Context64 210 // in the provided address space. This call will block while the thread 211 // is executing. 212 // 213 // If cpu is non-negative, and it is not the number of the CPU that the 214 // thread executes on, Context should return ErrContextCPUPreempted. cpu 215 // can only be non-negative if Platform.DetectsCPUPreemption() is true; 216 // Contexts from Platforms for which this does not hold may ignore cpu, or 217 // panic if cpu is non-negative. 218 // 219 // Switch may return one of the following special errors: 220 // 221 // - nil: The Context invoked a system call. 222 // 223 // - ErrContextSignal: The Context was interrupted by a signal. The 224 // returned *linux.SignalInfo contains information about the signal. If 225 // linux.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType 226 // contains the access type of the triggering fault. The caller owns 227 // the returned SignalInfo. 228 // 229 // - ErrContextInterrupt: The Context was interrupted by a call to 230 // Interrupt(). Switch() may return ErrContextInterrupt spuriously. In 231 // particular, most implementations of Interrupt() will cause the first 232 // following call to Switch() to return ErrContextInterrupt if there is no 233 // concurrent call to Switch(). 234 // 235 // - ErrContextCPUPreempted: See the definition of that error for details. 236 Switch(ctx context.Context, mm MemoryManager, ac *arch.Context64, cpu int32) (*linux.SignalInfo, hostarch.AccessType, error) 237 238 // PullFullState() pulls a full state of the application thread. 239 // 240 // A platform can support lazy loading/restoring of a thread state 241 // which includes registers and a floating point state. 242 // 243 // For example, when the Sentry handles a system call, it may have only 244 // syscall arguments without other registers and a floating point 245 // state. And in this case, if the Sentry will need to construct a 246 // signal frame to call a signal handler, it will need to call 247 // PullFullState() to load all registers and FPU state. 248 // 249 // Preconditions: The caller must be running on the task goroutine. 250 PullFullState(as AddressSpace, ac *arch.Context64) error 251 252 // FullStateChanged() indicates that a thread state has been changed by 253 // the Sentry. This happens in case of the rt_sigreturn, execve, etc. 254 // 255 // First, it indicates that the Sentry has the full state of the thread 256 // and PullFullState() has to do nothing if it is called after 257 // FullStateChanged(). 258 // 259 // Second, it forces restoring the full state of the application 260 // thread. A platform can support lazy loading/restoring of a thread 261 // state. This means that if the Sentry has not changed a thread state, 262 // the platform may not restore it. 263 // 264 // Preconditions: The caller must be running on the task goroutine. 265 FullStateChanged() 266 267 // Interrupt interrupts a concurrent call to Switch(), causing it to return 268 // ErrContextInterrupt. 269 Interrupt() 270 271 // Release() releases any resources associated with this context. 272 Release() 273 274 // PrepareSleep() is called when the tread switches to the 275 // interruptible sleep state. 276 PrepareSleep() 277 } 278 279 var ( 280 // ErrContextSignal is returned by Context.Switch() to indicate that the 281 // Context was interrupted by a signal. 282 ErrContextSignal = fmt.Errorf("interrupted by signal") 283 284 // ErrContextInterrupt is returned by Context.Switch() to indicate that the 285 // Context was interrupted by a call to Context.Interrupt(). 286 ErrContextInterrupt = fmt.Errorf("interrupted by platform.Context.Interrupt()") 287 288 // ErrContextCPUPreempted is returned by Context.Switch() to indicate that 289 // one of the following occurred: 290 // 291 // - The CPU executing the Context is not the CPU passed to 292 // Context.Switch(). 293 // 294 // - The CPU executing the Context may have executed another Context since 295 // the last time it executed this one; or the CPU has previously executed 296 // another Context, and has never executed this one. 297 // 298 // - Platform.PreemptAllCPUs() was called since the last return from 299 // Context.Switch(). 300 ErrContextCPUPreempted = fmt.Errorf("interrupted by CPU preemption") 301 ) 302 303 // SignalInterrupt is a signal reserved for use by implementations of 304 // Context.Interrupt(). The sentry guarantees that it will ignore delivery of 305 // this signal both to Contexts and to the sentry itself, under the assumption 306 // that they originate from races with Context.Interrupt(). 307 // 308 // NOTE(b/23420492): The Go runtime only guarantees that a small subset 309 // of signals will be always be unblocked on all threads, one of which 310 // is SIGCHLD. 311 const SignalInterrupt = linux.SIGCHLD 312 313 // AddressSpace represents a virtual address space in which a Context can 314 // execute. 315 type AddressSpace interface { 316 // MapFile creates a shared mapping of offsets fr from f at address addr. 317 // Any existing overlapping mappings are silently replaced. 318 // 319 // If precommit is true, the platform should eagerly commit resources (e.g. 320 // physical memory) to the mapping. The precommit flag is advisory and 321 // implementations may choose to ignore it. 322 // 323 // Preconditions: 324 // * addr and fr must be page-aligned. 325 // * fr.Length() > 0. 326 // * at.Any() == true. 327 // * At least one reference must be held on all pages in fr, and must 328 // continue to be held as long as pages are mapped. 329 MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error 330 331 // Unmap unmaps the given range. 332 // 333 // Preconditions: 334 // * addr is page-aligned. 335 // * length > 0. 336 Unmap(addr hostarch.Addr, length uint64) 337 338 // Release releases this address space. After releasing, a new AddressSpace 339 // must be acquired via platform.NewAddressSpace(). 340 Release() 341 342 // PreFork() is called before creating a copy of AddressSpace. This 343 // guarantees that this address space will be in a consistent state. 344 PreFork() 345 346 // PostFork() is called after creating a copy of AddressSpace. 347 PostFork() 348 349 // AddressSpaceIO methods are supported iff the associated platform's 350 // Platform.SupportsAddressSpaceIO() == true. AddressSpaces for which this 351 // does not hold may panic if AddressSpaceIO methods are invoked. 352 AddressSpaceIO 353 } 354 355 // AddressSpaceIO supports IO through the memory mappings installed in an 356 // AddressSpace. 357 // 358 // AddressSpaceIO implementors are responsible for ensuring that address ranges 359 // are application-mappable. 360 type AddressSpaceIO interface { 361 // CopyOut copies len(src) bytes from src to the memory mapped at addr. It 362 // returns the number of bytes copied. If the number of bytes copied is < 363 // len(src), it returns a non-nil error explaining why. 364 CopyOut(addr hostarch.Addr, src []byte) (int, error) 365 366 // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. 367 // It returns the number of bytes copied. If the number of bytes copied is 368 // < len(dst), it returns a non-nil error explaining why. 369 CopyIn(addr hostarch.Addr, dst []byte) (int, error) 370 371 // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number 372 // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a 373 // non-nil error explaining why. 374 ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) 375 376 // SwapUint32 atomically sets the uint32 value at addr to new and returns 377 // the previous value. 378 // 379 // Preconditions: addr must be aligned to a 4-byte boundary. 380 SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) 381 382 // CompareAndSwapUint32 atomically compares the uint32 value at addr to 383 // old; if they are equal, the value in memory is replaced by new. In 384 // either case, the previous value stored in memory is returned. 385 // 386 // Preconditions: addr must be aligned to a 4-byte boundary. 387 CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) 388 389 // LoadUint32 atomically loads the uint32 value at addr and returns it. 390 // 391 // Preconditions: addr must be aligned to a 4-byte boundary. 392 LoadUint32(addr hostarch.Addr) (uint32, error) 393 } 394 395 // NoAddressSpaceIO implements AddressSpaceIO methods by panicking. 396 type NoAddressSpaceIO struct{} 397 398 // CopyOut implements AddressSpaceIO.CopyOut. 399 func (NoAddressSpaceIO) CopyOut(addr hostarch.Addr, src []byte) (int, error) { 400 panic("This platform does not support AddressSpaceIO") 401 } 402 403 // CopyIn implements AddressSpaceIO.CopyIn. 404 func (NoAddressSpaceIO) CopyIn(addr hostarch.Addr, dst []byte) (int, error) { 405 panic("This platform does not support AddressSpaceIO") 406 } 407 408 // ZeroOut implements AddressSpaceIO.ZeroOut. 409 func (NoAddressSpaceIO) ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) { 410 panic("This platform does not support AddressSpaceIO") 411 } 412 413 // SwapUint32 implements AddressSpaceIO.SwapUint32. 414 func (NoAddressSpaceIO) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { 415 panic("This platform does not support AddressSpaceIO") 416 } 417 418 // CompareAndSwapUint32 implements AddressSpaceIO.CompareAndSwapUint32. 419 func (NoAddressSpaceIO) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { 420 panic("This platform does not support AddressSpaceIO") 421 } 422 423 // LoadUint32 implements AddressSpaceIO.LoadUint32. 424 func (NoAddressSpaceIO) LoadUint32(addr hostarch.Addr) (uint32, error) { 425 panic("This platform does not support AddressSpaceIO") 426 } 427 428 // SegmentationFault is an error returned by AddressSpaceIO methods when IO 429 // fails due to access of an unmapped page, or a mapped page with insufficient 430 // permissions. 431 type SegmentationFault struct { 432 // Addr is the address at which the fault occurred. 433 Addr hostarch.Addr 434 } 435 436 // Error implements error.Error. 437 func (f SegmentationFault) Error() string { 438 return fmt.Sprintf("segmentation fault at %#x", f.Addr) 439 } 440 441 // Requirements is used to specify platform specific requirements. 442 type Requirements struct { 443 // RequiresCurrentPIDNS indicates that the sandbox has to be started in the 444 // current pid namespace. 445 RequiresCurrentPIDNS bool 446 // RequiresCapSysPtrace indicates that the sandbox has to be started with 447 // the CAP_SYS_PTRACE capability. 448 RequiresCapSysPtrace bool 449 } 450 451 // Constructor represents a platform type. 452 type Constructor interface { 453 // New returns a new platform instance. 454 // 455 // Arguments: 456 // 457 // * deviceFile - the device file (e.g. /dev/kvm for the KVM platform). 458 New(deviceFile *os.File) (Platform, error) 459 460 // OpenDevice opens the path to the device used by the platform. 461 // Passing in an empty string will use the default path for the device, 462 // e.g. "/dev/kvm" for the KVM platform. 463 OpenDevice(devicePath string) (*os.File, error) 464 465 // Requirements returns platform specific requirements. 466 Requirements() Requirements 467 } 468 469 // platforms contains all available platform types. 470 var platforms = map[string]Constructor{} 471 472 // Register registers a new platform type. 473 func Register(name string, platform Constructor) { 474 platforms[name] = platform 475 } 476 477 // List lists available platforms. 478 func List() (available []string) { 479 for name := range platforms { 480 available = append(available, name) 481 } 482 return 483 } 484 485 // Lookup looks up the platform constructor by name. 486 func Lookup(name string) (Constructor, error) { 487 p, ok := platforms[name] 488 if !ok { 489 return nil, fmt.Errorf("unknown platform: %v", name) 490 } 491 return p, nil 492 }