github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/platform.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package platform provides a Platform abstraction. 16 // 17 // See Platform for more information. 18 package platform 19 20 import ( 21 "fmt" 22 "os" 23 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/context" 26 "github.com/SagerNet/gvisor/pkg/hostarch" 27 "github.com/SagerNet/gvisor/pkg/seccomp" 28 "github.com/SagerNet/gvisor/pkg/sentry/arch" 29 "github.com/SagerNet/gvisor/pkg/sentry/hostmm" 30 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 31 "github.com/SagerNet/gvisor/pkg/usermem" 32 ) 33 34 // Platform provides abstractions for execution contexts (Context, 35 // AddressSpace). 36 type Platform interface { 37 // SupportsAddressSpaceIO returns true if AddressSpaces returned by this 38 // Platform support AddressSpaceIO methods. 39 // 40 // The value returned by SupportsAddressSpaceIO is guaranteed to remain 41 // unchanged over the lifetime of the Platform. 42 SupportsAddressSpaceIO() bool 43 44 // CooperativelySchedulesAddressSpace returns true if the Platform has a 45 // limited number of AddressSpaces, such that mm.MemoryManager.Deactivate 46 // should call AddressSpace.Release when there are no goroutines that 47 // require the mm.MemoryManager to have an active AddressSpace. 48 // 49 // The value returned by CooperativelySchedulesAddressSpace is guaranteed 50 // to remain unchanged over the lifetime of the Platform. 51 CooperativelySchedulesAddressSpace() bool 52 53 // DetectsCPUPreemption returns true if Contexts returned by the Platform 54 // can reliably return ErrContextCPUPreempted. 55 DetectsCPUPreemption() bool 56 57 // HaveGlobalMemoryBarrier returns true if the GlobalMemoryBarrier method 58 // is supported. 59 HaveGlobalMemoryBarrier() bool 60 61 // MapUnit returns the alignment used for optional mappings into this 62 // platform's AddressSpaces. Higher values indicate lower per-page costs 63 // for AddressSpace.MapFile. As a special case, a MapUnit of 0 indicates 64 // that the cost of AddressSpace.MapFile is effectively independent of the 65 // number of pages mapped. If MapUnit is non-zero, it must be a power-of-2 66 // multiple of hostarch.PageSize. 67 MapUnit() uint64 68 69 // MinUserAddress returns the minimum mappable address on this 70 // platform. 71 MinUserAddress() hostarch.Addr 72 73 // MaxUserAddress returns the maximum mappable address on this 74 // platform. 75 MaxUserAddress() hostarch.Addr 76 77 // NewAddressSpace returns a new memory context for this platform. 78 // 79 // If mappingsID is not nil, the platform may assume that (1) all calls 80 // to NewAddressSpace with the same mappingsID represent the same 81 // (mutable) set of mappings, and (2) the set of mappings has not 82 // changed since the last time AddressSpace.Release was called on an 83 // AddressSpace returned by a call to NewAddressSpace with the same 84 // mappingsID. 85 // 86 // If a new AddressSpace cannot be created immediately, a nil 87 // AddressSpace is returned, along with channel that is closed when 88 // the caller should retry a call to NewAddressSpace. 89 // 90 // In general, this blocking behavior only occurs when 91 // CooperativelySchedulesAddressSpace (above) returns false. 92 NewAddressSpace(mappingsID interface{}) (AddressSpace, <-chan struct{}, error) 93 94 // NewContext returns a new execution context. 95 NewContext() Context 96 97 // PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well 98 // as the first following call to Context.Switch() for each Context, to 99 // return ErrContextCPUPreempted. 100 // 101 // PreemptAllCPUs is only supported if DetectsCPUPremption() == true. 102 // Platforms for which this does not hold may panic if PreemptAllCPUs is 103 // called. 104 PreemptAllCPUs() error 105 106 // GlobalMemoryBarrier blocks until all threads running application code 107 // (via Context.Switch) and all task goroutines "have passed through a 108 // state where all memory accesses to user-space addresses match program 109 // order between entry to and return from [GlobalMemoryBarrier]", as for 110 // membarrier(2). 111 // 112 // Preconditions: HaveGlobalMemoryBarrier() == true. 113 GlobalMemoryBarrier() error 114 115 // SyscallFilters returns syscalls made exclusively by this platform. 116 SyscallFilters() seccomp.SyscallRules 117 } 118 119 // NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and 120 // dependent methods for Platforms that do not support this feature. 121 type NoCPUPreemptionDetection struct{} 122 123 // DetectsCPUPreemption implements Platform.DetectsCPUPreemption. 124 func (NoCPUPreemptionDetection) DetectsCPUPreemption() bool { 125 return false 126 } 127 128 // PreemptAllCPUs implements Platform.PreemptAllCPUs. 129 func (NoCPUPreemptionDetection) PreemptAllCPUs() error { 130 panic("This platform does not support CPU preemption detection") 131 } 132 133 // UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and 134 // Platform.GlobalMemoryBarrier by invoking equivalent functionality on the 135 // host. 136 type UseHostGlobalMemoryBarrier struct{} 137 138 // HaveGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier. 139 func (UseHostGlobalMemoryBarrier) HaveGlobalMemoryBarrier() bool { 140 return hostmm.HaveGlobalMemoryBarrier() 141 } 142 143 // GlobalMemoryBarrier implements Platform.GlobalMemoryBarrier. 144 func (UseHostGlobalMemoryBarrier) GlobalMemoryBarrier() error { 145 return hostmm.GlobalMemoryBarrier() 146 } 147 148 // UseHostProcessMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and 149 // Platform.GlobalMemoryBarrier by invoking a process-local memory barrier. 150 // This is faster than UseHostGlobalMemoryBarrier, but is only appropriate for 151 // platforms for which application code executes while using the sentry's 152 // mm_struct. 153 type UseHostProcessMemoryBarrier struct{} 154 155 // HaveGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier. 156 func (UseHostProcessMemoryBarrier) HaveGlobalMemoryBarrier() bool { 157 // Fall back to a global memory barrier if a process-local one isn't 158 // available. 159 return hostmm.HaveProcessMemoryBarrier() || hostmm.HaveGlobalMemoryBarrier() 160 } 161 162 // GlobalMemoryBarrier implements Platform.GlobalMemoryBarrier. 163 func (UseHostProcessMemoryBarrier) GlobalMemoryBarrier() error { 164 if hostmm.HaveProcessMemoryBarrier() { 165 return hostmm.ProcessMemoryBarrier() 166 } 167 return hostmm.GlobalMemoryBarrier() 168 } 169 170 // MemoryManager represents an abstraction above the platform address space 171 // which manages memory mappings and their contents. 172 type MemoryManager interface { 173 //usermem.IO provides access to the contents of a virtual memory space. 174 usermem.IO 175 // MMap establishes a memory mapping. 176 MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) 177 // AddressSpace returns the AddressSpace bound to mm. 178 AddressSpace() AddressSpace 179 } 180 181 // Context represents the execution context for a single thread. 182 type Context interface { 183 // Switch resumes execution of the thread specified by the arch.Context 184 // in the provided address space. This call will block while the thread 185 // is executing. 186 // 187 // If cpu is non-negative, and it is not the number of the CPU that the 188 // thread executes on, Context should return ErrContextCPUPreempted. cpu 189 // can only be non-negative if Platform.DetectsCPUPreemption() is true; 190 // Contexts from Platforms for which this does not hold may ignore cpu, or 191 // panic if cpu is non-negative. 192 // 193 // Switch may return one of the following special errors: 194 // 195 // - nil: The Context invoked a system call. 196 // 197 // - ErrContextSignal: The Context was interrupted by a signal. The 198 // returned *linux.SignalInfo contains information about the signal. If 199 // linux.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType 200 // contains the access type of the triggering fault. The caller owns 201 // the returned SignalInfo. 202 // 203 // - ErrContextInterrupt: The Context was interrupted by a call to 204 // Interrupt(). Switch() may return ErrContextInterrupt spuriously. In 205 // particular, most implementations of Interrupt() will cause the first 206 // following call to Switch() to return ErrContextInterrupt if there is no 207 // concurrent call to Switch(). 208 // 209 // - ErrContextCPUPreempted: See the definition of that error for details. 210 Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*linux.SignalInfo, hostarch.AccessType, error) 211 212 // PullFullState() pulls a full state of the application thread. 213 // 214 // A platform can support lazy loading/restoring of a thread state 215 // which includes registers and a floating point state. 216 // 217 // For example, when the Sentry handles a system call, it may have only 218 // syscall arguments without other registers and a floating point 219 // state. And in this case, if the Sentry will need to construct a 220 // signal frame to call a signal handler, it will need to call 221 // PullFullState() to load all registers and FPU state. 222 // 223 // Preconditions: The caller must be running on the task goroutine. 224 PullFullState(as AddressSpace, ac arch.Context) 225 226 // FullStateChanged() indicates that a thread state has been changed by 227 // the Sentry. This happens in case of the rt_sigreturn, execve, etc. 228 // 229 // First, it indicates that the Sentry has the full state of the thread 230 // and PullFullState() has to do nothing if it is called after 231 // FullStateChanged(). 232 // 233 // Second, it forces restoring the full state of the application 234 // thread. A platform can support lazy loading/restoring of a thread 235 // state. This means that if the Sentry has not changed a thread state, 236 // the platform may not restore it. 237 // 238 // Preconditions: The caller must be running on the task goroutine. 239 FullStateChanged() 240 241 // Interrupt interrupts a concurrent call to Switch(), causing it to return 242 // ErrContextInterrupt. 243 Interrupt() 244 245 // Release() releases any resources associated with this context. 246 Release() 247 } 248 249 var ( 250 // ErrContextSignal is returned by Context.Switch() to indicate that the 251 // Context was interrupted by a signal. 252 ErrContextSignal = fmt.Errorf("interrupted by signal") 253 254 // ErrContextSignalCPUID is equivalent to ErrContextSignal, except that 255 // a check should be done for execution of the CPUID instruction. If 256 // the current instruction pointer is a CPUID instruction, then this 257 // should be emulated appropriately. If not, then the given signal 258 // should be handled per above. 259 ErrContextSignalCPUID = fmt.Errorf("interrupted by signal, possible CPUID") 260 261 // ErrContextInterrupt is returned by Context.Switch() to indicate that the 262 // Context was interrupted by a call to Context.Interrupt(). 263 ErrContextInterrupt = fmt.Errorf("interrupted by platform.Context.Interrupt()") 264 265 // ErrContextCPUPreempted is returned by Context.Switch() to indicate that 266 // one of the following occurred: 267 // 268 // - The CPU executing the Context is not the CPU passed to 269 // Context.Switch(). 270 // 271 // - The CPU executing the Context may have executed another Context since 272 // the last time it executed this one; or the CPU has previously executed 273 // another Context, and has never executed this one. 274 // 275 // - Platform.PreemptAllCPUs() was called since the last return from 276 // Context.Switch(). 277 ErrContextCPUPreempted = fmt.Errorf("interrupted by CPU preemption") 278 ) 279 280 // SignalInterrupt is a signal reserved for use by implementations of 281 // Context.Interrupt(). The sentry guarantees that it will ignore delivery of 282 // this signal both to Contexts and to the sentry itself, under the assumption 283 // that they originate from races with Context.Interrupt(). 284 // 285 // NOTE(b/23420492): The Go runtime only guarantees that a small subset 286 // of signals will be always be unblocked on all threads, one of which 287 // is SIGCHLD. 288 const SignalInterrupt = linux.SIGCHLD 289 290 // AddressSpace represents a virtual address space in which a Context can 291 // execute. 292 type AddressSpace interface { 293 // MapFile creates a shared mapping of offsets fr from f at address addr. 294 // Any existing overlapping mappings are silently replaced. 295 // 296 // If precommit is true, the platform should eagerly commit resources (e.g. 297 // physical memory) to the mapping. The precommit flag is advisory and 298 // implementations may choose to ignore it. 299 // 300 // Preconditions: 301 // * addr and fr must be page-aligned. 302 // * fr.Length() > 0. 303 // * at.Any() == true. 304 // * At least one reference must be held on all pages in fr, and must 305 // continue to be held as long as pages are mapped. 306 MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error 307 308 // Unmap unmaps the given range. 309 // 310 // Preconditions: 311 // * addr is page-aligned. 312 // * length > 0. 313 Unmap(addr hostarch.Addr, length uint64) 314 315 // Release releases this address space. After releasing, a new AddressSpace 316 // must be acquired via platform.NewAddressSpace(). 317 Release() 318 319 // PreFork() is called before creating a copy of AddressSpace. This 320 // guarantees that this address space will be in a consistent state. 321 PreFork() 322 323 // PostFork() is called after creating a copy of AddressSpace. 324 PostFork() 325 326 // AddressSpaceIO methods are supported iff the associated platform's 327 // Platform.SupportsAddressSpaceIO() == true. AddressSpaces for which this 328 // does not hold may panic if AddressSpaceIO methods are invoked. 329 AddressSpaceIO 330 } 331 332 // AddressSpaceIO supports IO through the memory mappings installed in an 333 // AddressSpace. 334 // 335 // AddressSpaceIO implementors are responsible for ensuring that address ranges 336 // are application-mappable. 337 type AddressSpaceIO interface { 338 // CopyOut copies len(src) bytes from src to the memory mapped at addr. It 339 // returns the number of bytes copied. If the number of bytes copied is < 340 // len(src), it returns a non-nil error explaining why. 341 CopyOut(addr hostarch.Addr, src []byte) (int, error) 342 343 // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. 344 // It returns the number of bytes copied. If the number of bytes copied is 345 // < len(dst), it returns a non-nil error explaining why. 346 CopyIn(addr hostarch.Addr, dst []byte) (int, error) 347 348 // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number 349 // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a 350 // non-nil error explaining why. 351 ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) 352 353 // SwapUint32 atomically sets the uint32 value at addr to new and returns 354 // the previous value. 355 // 356 // Preconditions: addr must be aligned to a 4-byte boundary. 357 SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) 358 359 // CompareAndSwapUint32 atomically compares the uint32 value at addr to 360 // old; if they are equal, the value in memory is replaced by new. In 361 // either case, the previous value stored in memory is returned. 362 // 363 // Preconditions: addr must be aligned to a 4-byte boundary. 364 CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) 365 366 // LoadUint32 atomically loads the uint32 value at addr and returns it. 367 // 368 // Preconditions: addr must be aligned to a 4-byte boundary. 369 LoadUint32(addr hostarch.Addr) (uint32, error) 370 } 371 372 // NoAddressSpaceIO implements AddressSpaceIO methods by panicking. 373 type NoAddressSpaceIO struct{} 374 375 // CopyOut implements AddressSpaceIO.CopyOut. 376 func (NoAddressSpaceIO) CopyOut(addr hostarch.Addr, src []byte) (int, error) { 377 panic("This platform does not support AddressSpaceIO") 378 } 379 380 // CopyIn implements AddressSpaceIO.CopyIn. 381 func (NoAddressSpaceIO) CopyIn(addr hostarch.Addr, dst []byte) (int, error) { 382 panic("This platform does not support AddressSpaceIO") 383 } 384 385 // ZeroOut implements AddressSpaceIO.ZeroOut. 386 func (NoAddressSpaceIO) ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) { 387 panic("This platform does not support AddressSpaceIO") 388 } 389 390 // SwapUint32 implements AddressSpaceIO.SwapUint32. 391 func (NoAddressSpaceIO) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { 392 panic("This platform does not support AddressSpaceIO") 393 } 394 395 // CompareAndSwapUint32 implements AddressSpaceIO.CompareAndSwapUint32. 396 func (NoAddressSpaceIO) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { 397 panic("This platform does not support AddressSpaceIO") 398 } 399 400 // LoadUint32 implements AddressSpaceIO.LoadUint32. 401 func (NoAddressSpaceIO) LoadUint32(addr hostarch.Addr) (uint32, error) { 402 panic("This platform does not support AddressSpaceIO") 403 } 404 405 // SegmentationFault is an error returned by AddressSpaceIO methods when IO 406 // fails due to access of an unmapped page, or a mapped page with insufficient 407 // permissions. 408 type SegmentationFault struct { 409 // Addr is the address at which the fault occurred. 410 Addr hostarch.Addr 411 } 412 413 // Error implements error.Error. 414 func (f SegmentationFault) Error() string { 415 return fmt.Sprintf("segmentation fault at %#x", f.Addr) 416 } 417 418 // Requirements is used to specify platform specific requirements. 419 type Requirements struct { 420 // RequiresCurrentPIDNS indicates that the sandbox has to be started in the 421 // current pid namespace. 422 RequiresCurrentPIDNS bool 423 // RequiresCapSysPtrace indicates that the sandbox has to be started with 424 // the CAP_SYS_PTRACE capability. 425 RequiresCapSysPtrace bool 426 } 427 428 // Constructor represents a platform type. 429 type Constructor interface { 430 // New returns a new platform instance. 431 // 432 // Arguments: 433 // 434 // * deviceFile - the device file (e.g. /dev/kvm for the KVM platform). 435 New(deviceFile *os.File) (Platform, error) 436 OpenDevice() (*os.File, error) 437 438 // Requirements returns platform specific requirements. 439 Requirements() Requirements 440 } 441 442 // platforms contains all available platform types. 443 var platforms = map[string]Constructor{} 444 445 // Register registers a new platform type. 446 func Register(name string, platform Constructor) { 447 platforms[name] = platform 448 } 449 450 // Lookup looks up the platform constructor by name. 451 func Lookup(name string) (Constructor, error) { 452 p, ok := platforms[name] 453 if !ok { 454 return nil, fmt.Errorf("unknown platform: %v", name) 455 } 456 return p, nil 457 }