gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/loader/loader.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package loader loads an executable file into a MemoryManager. 16 package loader 17 18 import ( 19 "bytes" 20 "fmt" 21 "io" 22 "path" 23 24 "gvisor.dev/gvisor/pkg/abi" 25 "gvisor.dev/gvisor/pkg/abi/linux" 26 "gvisor.dev/gvisor/pkg/abi/linux/errno" 27 "gvisor.dev/gvisor/pkg/context" 28 "gvisor.dev/gvisor/pkg/cpuid" 29 "gvisor.dev/gvisor/pkg/errors/linuxerr" 30 "gvisor.dev/gvisor/pkg/fspath" 31 "gvisor.dev/gvisor/pkg/hostarch" 32 "gvisor.dev/gvisor/pkg/rand" 33 "gvisor.dev/gvisor/pkg/sentry/arch" 34 "gvisor.dev/gvisor/pkg/sentry/kernel/auth" 35 "gvisor.dev/gvisor/pkg/sentry/mm" 36 "gvisor.dev/gvisor/pkg/sentry/vfs" 37 "gvisor.dev/gvisor/pkg/syserr" 38 "gvisor.dev/gvisor/pkg/usermem" 39 ) 40 41 const ( 42 securityCapability = linux.XATTR_SECURITY_PREFIX + "capability" 43 ) 44 45 // LoadArgs holds specifications for an executable file to be loaded. 46 type LoadArgs struct { 47 // MemoryManager is the memory manager to load the executable into. 48 MemoryManager *mm.MemoryManager 49 50 // RemainingTraversals is the maximum number of symlinks to follow to 51 // resolve Filename. This counter is passed by reference to keep it 52 // updated throughout the call stack. 53 RemainingTraversals *uint 54 55 // ResolveFinal indicates whether the final link of Filename should be 56 // resolved, if it is a symlink. 57 ResolveFinal bool 58 59 // Filename is the path for the executable. 60 Filename string 61 62 // File is an open FD of the executable. If File is not nil, then File will 63 // be loaded and Filename will be ignored. 64 // 65 // The caller is responsible for checking that the user can execute this file. 66 File *vfs.FileDescription 67 68 // Root is the current filesystem root. 69 Root vfs.VirtualDentry 70 71 // WorkingDir is the current working directory. 72 WorkingDir vfs.VirtualDentry 73 74 // If AfterOpen is not nil, it is called after every successful call to 75 // Opener.OpenPath(). 76 AfterOpen func(f *vfs.FileDescription) 77 78 // CloseOnExec indicates that the executable (or one of its parent 79 // directories) was opened with O_CLOEXEC. If the executable is an 80 // interpreter script, then cause an ENOENT error to occur, since the 81 // script would otherwise be inaccessible to the interpreter. 82 CloseOnExec bool 83 84 // Argv is the vector of arguments to pass to the executable. 85 Argv []string 86 87 // Envv is the vector of environment variables to pass to the 88 // executable. 89 Envv []string 90 91 // Features specifies the CPU feature set for the executable. 92 Features cpuid.FeatureSet 93 } 94 95 // openPath opens args.Filename and checks that it is valid for loading. 96 // 97 // openPath returns an *fs.Dirent and *fs.File for args.Filename, which is not 98 // installed in the Task FDTable. The caller takes ownership of both. 99 // 100 // args.Filename must be a readable, executable, regular file. 101 func openPath(ctx context.Context, args LoadArgs) (*vfs.FileDescription, error) { 102 if args.Filename == "" { 103 ctx.Infof("cannot open empty name") 104 return nil, linuxerr.ENOENT 105 } 106 107 // TODO(gvisor.dev/issue/160): Linux requires only execute permission, 108 // not read. However, our backing filesystems may prevent us from reading 109 // the file without read permission. Additionally, a task with a 110 // non-readable executable has additional constraints on access via 111 // ptrace and procfs. 112 opts := vfs.OpenOptions{ 113 Flags: linux.O_RDONLY, 114 FileExec: true, 115 } 116 vfsObj := args.Root.Mount().Filesystem().VirtualFilesystem() 117 creds := auth.CredentialsFromContext(ctx) 118 path := fspath.Parse(args.Filename) 119 pop := &vfs.PathOperation{ 120 Root: args.Root, 121 Start: args.WorkingDir, 122 Path: path, 123 FollowFinalSymlink: args.ResolveFinal, 124 } 125 if path.Absolute { 126 pop.Start = args.Root 127 } 128 fd, err := vfsObj.OpenAt(ctx, creds, pop, &opts) 129 if err != nil { 130 return nil, err 131 } 132 if args.AfterOpen != nil { 133 args.AfterOpen(fd) 134 } 135 return fd, nil 136 } 137 138 // checkIsRegularFile prevents us from trying to execute a directory, pipe, etc. 139 func checkIsRegularFile(ctx context.Context, fd *vfs.FileDescription, filename string) error { 140 stat, err := fd.Stat(ctx, vfs.StatOptions{}) 141 if err != nil { 142 return err 143 } 144 if t := linux.FileMode(stat.Mode).FileType(); t != linux.ModeRegular { 145 ctx.Infof("%q is not a regular file: %v", filename, t) 146 return linuxerr.EACCES 147 } 148 return nil 149 } 150 151 // allocStack allocates and maps a stack in to any available part of the address space. 152 func allocStack(ctx context.Context, m *mm.MemoryManager, a *arch.Context64) (*arch.Stack, error) { 153 ar, err := m.MapStack(ctx) 154 if err != nil { 155 return nil, err 156 } 157 return &arch.Stack{Arch: a, IO: m, Bottom: ar.End}, nil 158 } 159 160 const ( 161 // maxLoaderAttempts is the maximum number of attempts to try to load 162 // an interpreter scripts, to prevent loops. 6 (initial + 5 changes) is 163 // what the Linux kernel allows (fs/exec.c:search_binary_handler). 164 maxLoaderAttempts = 6 165 ) 166 167 // loadExecutable loads an executable that is pointed to by args.File. The 168 // caller is responsible for checking that the user can execute this file. 169 // If nil, the path args.Filename is resolved and loaded (check that the user 170 // can execute this file is done here in this case). If the executable is an 171 // interpreter script rather than an ELF, the binary of the corresponding 172 // interpreter will be loaded. 173 // 174 // It returns: 175 // - loadedELF, description of the loaded binary 176 // - arch.Context64 matching the binary arch 177 // - fs.Dirent of the binary file 178 // - Possibly updated args.Argv 179 func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, *arch.Context64, *vfs.FileDescription, []string, error) { 180 for i := 0; i < maxLoaderAttempts; i++ { 181 if args.File == nil { 182 var err error 183 args.File, err = openPath(ctx, args) 184 if err != nil { 185 ctx.Infof("Error opening %s: %v", args.Filename, err) 186 return loadedELF{}, nil, nil, nil, err 187 } 188 // Ensure file is release in case the code loops or errors out. 189 defer args.File.DecRef(ctx) 190 } else { 191 if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil { 192 return loadedELF{}, nil, nil, nil, err 193 } 194 } 195 196 // Check the header. Is this an ELF or interpreter script? 197 var hdr [4]uint8 198 // N.B. We assume that reading from a regular file cannot block. 199 _, err := args.File.ReadFull(ctx, usermem.BytesIOSequence(hdr[:]), 0) 200 // Allow unexpected EOF, as a valid executable could be only three bytes 201 // (e.g., #!a). 202 if err != nil && err != io.ErrUnexpectedEOF { 203 if err == io.EOF { 204 err = linuxerr.ENOEXEC 205 } 206 return loadedELF{}, nil, nil, nil, err 207 } 208 209 switch { 210 case bytes.Equal(hdr[:], []byte(elfMagic)): 211 loaded, ac, err := loadELF(ctx, args) 212 if err != nil { 213 ctx.Infof("Error loading ELF: %v", err) 214 return loadedELF{}, nil, nil, nil, err 215 } 216 // An ELF is always terminal. Hold on to file. 217 args.File.IncRef() 218 return loaded, ac, args.File, args.Argv, err 219 220 case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): 221 if args.CloseOnExec { 222 return loadedELF{}, nil, nil, nil, linuxerr.ENOENT 223 } 224 args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv) 225 if err != nil { 226 ctx.Infof("Error loading interpreter script: %v", err) 227 return loadedELF{}, nil, nil, nil, err 228 } 229 // Refresh the traversal limit for the interpreter. 230 *args.RemainingTraversals = linux.MaxSymlinkTraversals 231 232 default: 233 ctx.Infof("Unknown magic: %v", hdr) 234 return loadedELF{}, nil, nil, nil, linuxerr.ENOEXEC 235 } 236 // Set to nil in case we loop on a Interpreter Script. 237 args.File = nil 238 } 239 240 return loadedELF{}, nil, nil, nil, linuxerr.ELOOP 241 } 242 243 // ImageInfo represents the information for the loaded image. 244 type ImageInfo struct { 245 // The target operating system of the image. 246 OS abi.OS 247 // AMD64 context. 248 Arch *arch.Context64 249 // The base name of the binary. 250 Name string 251 // The binary's file capability. 252 FileCaps string 253 } 254 255 // Load loads args.File into a MemoryManager. If args.File is nil, the path 256 // args.Filename is resolved and loaded instead. 257 // 258 // If Load returns ErrSwitchFile it should be called again with the returned 259 // path and argv. 260 // 261 // Preconditions: 262 // - The Task MemoryManager is empty. 263 // - Load is called on the Task goroutine. 264 func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (ImageInfo, *syserr.Error) { 265 // Load the executable itself. 266 loaded, ac, file, newArgv, err := loadExecutable(ctx, args) 267 if err != nil { 268 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux()) 269 } 270 defer file.DecRef(ctx) 271 xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: securityCapability, Size: linux.XATTR_CAPS_SZ_3}) 272 switch { 273 case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.ENOTSUP, err): 274 xattr = "" 275 case err != nil: 276 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to read file capabilities of %s: %v", args.Filename, err), syserr.FromError(err).ToLinux()) 277 } 278 279 // Load the VDSO. 280 vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded) 281 if err != nil { 282 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("error loading VDSO: %v", err), syserr.FromError(err).ToLinux()) 283 } 284 285 // Setup the heap. brk starts at the next page after the end of the 286 // executable. Userspace can assume that the remainder of the page after 287 // loaded.end is available for its use. 288 e, ok := loaded.end.RoundUp() 289 if !ok { 290 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("brk overflows: %#x", loaded.end), errno.ENOEXEC) 291 } 292 args.MemoryManager.BrkSetup(ctx, e) 293 294 // Allocate our stack. 295 stack, err := allocStack(ctx, args.MemoryManager, ac) 296 if err != nil { 297 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to allocate stack: %v", err), syserr.FromError(err).ToLinux()) 298 } 299 300 // Push the original filename to the stack, for AT_EXECFN. 301 if _, err := stack.PushNullTerminatedByteSlice([]byte(args.Filename)); err != nil { 302 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to push exec filename: %v", err), syserr.FromError(err).ToLinux()) 303 } 304 execfn := stack.Bottom 305 306 // Push 16 random bytes on the stack which AT_RANDOM will point to. 307 var b [16]byte 308 if _, err := rand.Read(b[:]); err != nil { 309 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to read random bytes: %v", err), syserr.FromError(err).ToLinux()) 310 } 311 if _, err = stack.PushNullTerminatedByteSlice(b[:]); err != nil { 312 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to push random bytes: %v", err), syserr.FromError(err).ToLinux()) 313 } 314 random := stack.Bottom 315 316 c := auth.CredentialsFromContext(ctx) 317 318 // Add generic auxv entries. 319 auxv := append(loaded.auxv, arch.Auxv{ 320 arch.AuxEntry{linux.AT_UID, hostarch.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())}, 321 arch.AuxEntry{linux.AT_EUID, hostarch.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())}, 322 arch.AuxEntry{linux.AT_GID, hostarch.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())}, 323 arch.AuxEntry{linux.AT_EGID, hostarch.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())}, 324 // The conditions that require AT_SECURE = 1 never arise. See 325 // kernel.Task.updateCredsForExecLocked. 326 arch.AuxEntry{linux.AT_SECURE, 0}, 327 arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC}, 328 arch.AuxEntry{linux.AT_EXECFN, execfn}, 329 arch.AuxEntry{linux.AT_RANDOM, random}, 330 arch.AuxEntry{linux.AT_PAGESZ, hostarch.PageSize}, 331 arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr}, 332 }...) 333 auxv = append(auxv, extraAuxv...) 334 335 sl, err := stack.Load(newArgv, args.Envv, auxv) 336 if err != nil { 337 return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to load stack: %v", err), syserr.FromError(err).ToLinux()) 338 } 339 340 m := args.MemoryManager 341 m.SetArgvStart(sl.ArgvStart) 342 m.SetArgvEnd(sl.ArgvEnd) 343 m.SetEnvvStart(sl.EnvvStart) 344 m.SetEnvvEnd(sl.EnvvEnd) 345 m.SetAuxv(auxv) 346 m.SetExecutable(ctx, file) 347 m.SetVDSOSigReturn(uint64(vdsoAddr) + vdsoSigreturnOffset - vdsoPrelink) 348 349 ac.SetIP(uintptr(loaded.entry)) 350 ac.SetStack(uintptr(stack.Bottom)) 351 352 name := path.Base(args.Filename) 353 if len(name) > linux.TASK_COMM_LEN-1 { 354 name = name[:linux.TASK_COMM_LEN-1] 355 } 356 357 return ImageInfo{ 358 OS: loaded.os, 359 Arch: ac, 360 Name: name, 361 FileCaps: xattr, 362 }, nil 363 }