gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/task_image.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "fmt" 19 20 "gvisor.dev/gvisor/pkg/abi/linux/errno" 21 "gvisor.dev/gvisor/pkg/context" 22 "gvisor.dev/gvisor/pkg/hostarch" 23 "gvisor.dev/gvisor/pkg/sentry/arch" 24 "gvisor.dev/gvisor/pkg/sentry/kernel/futex" 25 "gvisor.dev/gvisor/pkg/sentry/loader" 26 "gvisor.dev/gvisor/pkg/sentry/mm" 27 "gvisor.dev/gvisor/pkg/syserr" 28 ) 29 30 var errNoSyscalls = syserr.New("no syscall table found", errno.ENOEXEC) 31 32 // Auxmap contains miscellaneous data for the task. 33 type Auxmap map[string]any 34 35 // TaskImage is the subset of a task's data that is provided by the loader. 36 // 37 // +stateify savable 38 type TaskImage struct { 39 // Name is the thread name set by the prctl(PR_SET_NAME) system call. 40 Name string 41 42 // Arch is the architecture-specific context (registers, etc.) 43 Arch *arch.Context64 44 45 // MemoryManager is the task's address space. 46 MemoryManager *mm.MemoryManager 47 48 // fu implements futexes in the address space. 49 fu *futex.Manager 50 51 // st is the task's syscall table. 52 st *SyscallTable `state:".(syscallTableInfo)"` 53 54 // fileCaps is the image's extended attribute named security.capability. 55 fileCaps string 56 } 57 58 // FileCaps return the task image's security.capability extended attribute. 59 func (image *TaskImage) FileCaps() string { 60 return image.fileCaps 61 } 62 63 // release releases all resources held by the TaskImage. release is called by 64 // the task when it execs into a new TaskImage. 65 func (image *TaskImage) release(ctx context.Context) { 66 // Nil out pointers so that if the task is saved after release, it doesn't 67 // follow the pointers to possibly now-invalid objects. 68 if image.MemoryManager != nil { 69 image.MemoryManager.DecUsers(ctx) 70 image.MemoryManager = nil 71 } 72 image.fu = nil 73 } 74 75 // Fork returns a duplicate of image. The copied TaskImage always has an 76 // independent arch.Context64. If shareAddressSpace is true, the copied 77 // TaskImage shares an address space with the original; otherwise, the copied 78 // TaskImage has an independent address space that is initially a duplicate 79 // of the original's. 80 func (image *TaskImage) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskImage, error) { 81 newImage := &TaskImage{ 82 Name: image.Name, 83 Arch: image.Arch.Fork(), 84 st: image.st, 85 } 86 if shareAddressSpace { 87 newImage.MemoryManager = image.MemoryManager 88 if newImage.MemoryManager != nil { 89 if !newImage.MemoryManager.IncUsers() { 90 // Shouldn't be possible since image.MemoryManager should be a 91 // counted user. 92 panic(fmt.Sprintf("TaskImage.Fork called with userless TaskImage.MemoryManager")) 93 } 94 } 95 newImage.fu = image.fu 96 } else { 97 newMM, err := image.MemoryManager.Fork(ctx) 98 if err != nil { 99 return nil, err 100 } 101 newImage.MemoryManager = newMM 102 newImage.fu = k.futexes.Fork() 103 } 104 return newImage, nil 105 } 106 107 // Arch returns t's arch.Context64. 108 // 109 // Preconditions: The caller must be running on the task goroutine, or t.mu 110 // must be locked. 111 func (t *Task) Arch() *arch.Context64 { 112 return t.image.Arch 113 } 114 115 // MemoryManager returns t's MemoryManager. MemoryManager does not take an 116 // additional reference on the returned MM. 117 // 118 // Preconditions: The caller must be running on the task goroutine, or t.mu 119 // must be locked. 120 func (t *Task) MemoryManager() *mm.MemoryManager { 121 return t.image.MemoryManager 122 } 123 124 // SyscallTable returns t's syscall table. 125 // 126 // Preconditions: The caller must be running on the task goroutine, or t.mu 127 // must be locked. 128 func (t *Task) SyscallTable() *SyscallTable { 129 return t.image.st 130 } 131 132 // Stack returns the userspace stack. 133 // 134 // Preconditions: The caller must be running on the task goroutine, or t.mu 135 // must be locked. 136 func (t *Task) Stack() *arch.Stack { 137 return &arch.Stack{ 138 Arch: t.Arch(), 139 IO: t.MemoryManager(), 140 Bottom: hostarch.Addr(t.Arch().Stack()), 141 } 142 } 143 144 // LoadTaskImage loads a specified file into a new TaskImage. 145 // 146 // args.MemoryManager does not need to be set by the caller. 147 func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskImage, *syserr.Error) { 148 // Prepare a new user address space to load into. 149 m := mm.NewMemoryManager(k, k.mf, k.SleepForAddressSpaceActivation) 150 defer m.DecUsers(ctx) 151 args.MemoryManager = m 152 153 info, err := loader.Load(ctx, args, k.extraAuxv, k.vdso) 154 if err != nil { 155 return nil, err 156 } 157 158 // Lookup our new syscall table. 159 st, ok := LookupSyscallTable(info.OS, info.Arch.Arch()) 160 if !ok { 161 // No syscall table found. This means that the ELF binary does not match 162 // the architecture. 163 return nil, errNoSyscalls 164 } 165 166 if !m.IncUsers() { 167 panic("Failed to increment users count on new MM") 168 } 169 return &TaskImage{ 170 Name: info.Name, 171 Arch: info.Arch, 172 MemoryManager: m, 173 fu: k.futexes.Fork(), 174 st: st, 175 fileCaps: info.FileCaps, 176 }, nil 177 }