gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/loader/loader.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package loader loads an executable file into a MemoryManager.
    16  package loader
    17  
    18  import (
    19  	"bytes"
    20  	"fmt"
    21  	"io"
    22  	"path"
    23  
    24  	"gvisor.dev/gvisor/pkg/abi"
    25  	"gvisor.dev/gvisor/pkg/abi/linux"
    26  	"gvisor.dev/gvisor/pkg/abi/linux/errno"
    27  	"gvisor.dev/gvisor/pkg/context"
    28  	"gvisor.dev/gvisor/pkg/cpuid"
    29  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    30  	"gvisor.dev/gvisor/pkg/fspath"
    31  	"gvisor.dev/gvisor/pkg/hostarch"
    32  	"gvisor.dev/gvisor/pkg/rand"
    33  	"gvisor.dev/gvisor/pkg/sentry/arch"
    34  	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
    35  	"gvisor.dev/gvisor/pkg/sentry/mm"
    36  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    37  	"gvisor.dev/gvisor/pkg/syserr"
    38  	"gvisor.dev/gvisor/pkg/usermem"
    39  )
    40  
    41  const (
    42  	securityCapability = linux.XATTR_SECURITY_PREFIX + "capability"
    43  )
    44  
    45  // LoadArgs holds specifications for an executable file to be loaded.
    46  type LoadArgs struct {
    47  	// MemoryManager is the memory manager to load the executable into.
    48  	MemoryManager *mm.MemoryManager
    49  
    50  	// RemainingTraversals is the maximum number of symlinks to follow to
    51  	// resolve Filename. This counter is passed by reference to keep it
    52  	// updated throughout the call stack.
    53  	RemainingTraversals *uint
    54  
    55  	// ResolveFinal indicates whether the final link of Filename should be
    56  	// resolved, if it is a symlink.
    57  	ResolveFinal bool
    58  
    59  	// Filename is the path for the executable.
    60  	Filename string
    61  
    62  	// File is an open FD of the executable. If File is not nil, then File will
    63  	// be loaded and Filename will be ignored.
    64  	//
    65  	// The caller is responsible for checking that the user can execute this file.
    66  	File *vfs.FileDescription
    67  
    68  	// Root is the current filesystem root.
    69  	Root vfs.VirtualDentry
    70  
    71  	// WorkingDir is the current working directory.
    72  	WorkingDir vfs.VirtualDentry
    73  
    74  	// If AfterOpen is not nil, it is called after every successful call to
    75  	// Opener.OpenPath().
    76  	AfterOpen func(f *vfs.FileDescription)
    77  
    78  	// CloseOnExec indicates that the executable (or one of its parent
    79  	// directories) was opened with O_CLOEXEC. If the executable is an
    80  	// interpreter script, then cause an ENOENT error to occur, since the
    81  	// script would otherwise be inaccessible to the interpreter.
    82  	CloseOnExec bool
    83  
    84  	// Argv is the vector of arguments to pass to the executable.
    85  	Argv []string
    86  
    87  	// Envv is the vector of environment variables to pass to the
    88  	// executable.
    89  	Envv []string
    90  
    91  	// Features specifies the CPU feature set for the executable.
    92  	Features cpuid.FeatureSet
    93  }
    94  
    95  // openPath opens args.Filename and checks that it is valid for loading.
    96  //
    97  // openPath returns an *fs.Dirent and *fs.File for args.Filename, which is not
    98  // installed in the Task FDTable. The caller takes ownership of both.
    99  //
   100  // args.Filename must be a readable, executable, regular file.
   101  func openPath(ctx context.Context, args LoadArgs) (*vfs.FileDescription, error) {
   102  	if args.Filename == "" {
   103  		ctx.Infof("cannot open empty name")
   104  		return nil, linuxerr.ENOENT
   105  	}
   106  
   107  	// TODO(gvisor.dev/issue/160): Linux requires only execute permission,
   108  	// not read. However, our backing filesystems may prevent us from reading
   109  	// the file without read permission. Additionally, a task with a
   110  	// non-readable executable has additional constraints on access via
   111  	// ptrace and procfs.
   112  	opts := vfs.OpenOptions{
   113  		Flags:    linux.O_RDONLY,
   114  		FileExec: true,
   115  	}
   116  	vfsObj := args.Root.Mount().Filesystem().VirtualFilesystem()
   117  	creds := auth.CredentialsFromContext(ctx)
   118  	path := fspath.Parse(args.Filename)
   119  	pop := &vfs.PathOperation{
   120  		Root:               args.Root,
   121  		Start:              args.WorkingDir,
   122  		Path:               path,
   123  		FollowFinalSymlink: args.ResolveFinal,
   124  	}
   125  	if path.Absolute {
   126  		pop.Start = args.Root
   127  	}
   128  	fd, err := vfsObj.OpenAt(ctx, creds, pop, &opts)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	if args.AfterOpen != nil {
   133  		args.AfterOpen(fd)
   134  	}
   135  	return fd, nil
   136  }
   137  
   138  // checkIsRegularFile prevents us from trying to execute a directory, pipe, etc.
   139  func checkIsRegularFile(ctx context.Context, fd *vfs.FileDescription, filename string) error {
   140  	stat, err := fd.Stat(ctx, vfs.StatOptions{})
   141  	if err != nil {
   142  		return err
   143  	}
   144  	if t := linux.FileMode(stat.Mode).FileType(); t != linux.ModeRegular {
   145  		ctx.Infof("%q is not a regular file: %v", filename, t)
   146  		return linuxerr.EACCES
   147  	}
   148  	return nil
   149  }
   150  
   151  // allocStack allocates and maps a stack in to any available part of the address space.
   152  func allocStack(ctx context.Context, m *mm.MemoryManager, a *arch.Context64) (*arch.Stack, error) {
   153  	ar, err := m.MapStack(ctx)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  	return &arch.Stack{Arch: a, IO: m, Bottom: ar.End}, nil
   158  }
   159  
   160  const (
   161  	// maxLoaderAttempts is the maximum number of attempts to try to load
   162  	// an interpreter scripts, to prevent loops. 6 (initial + 5 changes) is
   163  	// what the Linux kernel allows (fs/exec.c:search_binary_handler).
   164  	maxLoaderAttempts = 6
   165  )
   166  
   167  // loadExecutable loads an executable that is pointed to by args.File. The
   168  // caller is responsible for checking that the user can execute this file.
   169  // If nil, the path args.Filename is resolved and loaded (check that the user
   170  // can execute this file is done here in this case). If the executable is an
   171  // interpreter script rather than an ELF, the binary of the corresponding
   172  // interpreter will be loaded.
   173  //
   174  // It returns:
   175  //   - loadedELF, description of the loaded binary
   176  //   - arch.Context64 matching the binary arch
   177  //   - fs.Dirent of the binary file
   178  //   - Possibly updated args.Argv
   179  func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, *arch.Context64, *vfs.FileDescription, []string, error) {
   180  	for i := 0; i < maxLoaderAttempts; i++ {
   181  		if args.File == nil {
   182  			var err error
   183  			args.File, err = openPath(ctx, args)
   184  			if err != nil {
   185  				ctx.Infof("Error opening %s: %v", args.Filename, err)
   186  				return loadedELF{}, nil, nil, nil, err
   187  			}
   188  			// Ensure file is release in case the code loops or errors out.
   189  			defer args.File.DecRef(ctx)
   190  		} else {
   191  			if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil {
   192  				return loadedELF{}, nil, nil, nil, err
   193  			}
   194  		}
   195  
   196  		// Check the header. Is this an ELF or interpreter script?
   197  		var hdr [4]uint8
   198  		// N.B. We assume that reading from a regular file cannot block.
   199  		_, err := args.File.ReadFull(ctx, usermem.BytesIOSequence(hdr[:]), 0)
   200  		// Allow unexpected EOF, as a valid executable could be only three bytes
   201  		// (e.g., #!a).
   202  		if err != nil && err != io.ErrUnexpectedEOF {
   203  			if err == io.EOF {
   204  				err = linuxerr.ENOEXEC
   205  			}
   206  			return loadedELF{}, nil, nil, nil, err
   207  		}
   208  
   209  		switch {
   210  		case bytes.Equal(hdr[:], []byte(elfMagic)):
   211  			loaded, ac, err := loadELF(ctx, args)
   212  			if err != nil {
   213  				ctx.Infof("Error loading ELF: %v", err)
   214  				return loadedELF{}, nil, nil, nil, err
   215  			}
   216  			// An ELF is always terminal. Hold on to file.
   217  			args.File.IncRef()
   218  			return loaded, ac, args.File, args.Argv, err
   219  
   220  		case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)):
   221  			if args.CloseOnExec {
   222  				return loadedELF{}, nil, nil, nil, linuxerr.ENOENT
   223  			}
   224  			args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv)
   225  			if err != nil {
   226  				ctx.Infof("Error loading interpreter script: %v", err)
   227  				return loadedELF{}, nil, nil, nil, err
   228  			}
   229  			// Refresh the traversal limit for the interpreter.
   230  			*args.RemainingTraversals = linux.MaxSymlinkTraversals
   231  
   232  		default:
   233  			ctx.Infof("Unknown magic: %v", hdr)
   234  			return loadedELF{}, nil, nil, nil, linuxerr.ENOEXEC
   235  		}
   236  		// Set to nil in case we loop on a Interpreter Script.
   237  		args.File = nil
   238  	}
   239  
   240  	return loadedELF{}, nil, nil, nil, linuxerr.ELOOP
   241  }
   242  
   243  // ImageInfo represents the information for the loaded image.
   244  type ImageInfo struct {
   245  	// The target operating system of the image.
   246  	OS abi.OS
   247  	// AMD64 context.
   248  	Arch *arch.Context64
   249  	// The base name of the binary.
   250  	Name string
   251  	// The binary's file capability.
   252  	FileCaps string
   253  }
   254  
   255  // Load loads args.File into a MemoryManager. If args.File is nil, the path
   256  // args.Filename is resolved and loaded instead.
   257  //
   258  // If Load returns ErrSwitchFile it should be called again with the returned
   259  // path and argv.
   260  //
   261  // Preconditions:
   262  //   - The Task MemoryManager is empty.
   263  //   - Load is called on the Task goroutine.
   264  func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (ImageInfo, *syserr.Error) {
   265  	// Load the executable itself.
   266  	loaded, ac, file, newArgv, err := loadExecutable(ctx, args)
   267  	if err != nil {
   268  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
   269  	}
   270  	defer file.DecRef(ctx)
   271  	xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: securityCapability, Size: linux.XATTR_CAPS_SZ_3})
   272  	switch {
   273  	case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.ENOTSUP, err):
   274  		xattr = ""
   275  	case err != nil:
   276  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to read file capabilities of %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
   277  	}
   278  
   279  	// Load the VDSO.
   280  	vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded)
   281  	if err != nil {
   282  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("error loading VDSO: %v", err), syserr.FromError(err).ToLinux())
   283  	}
   284  
   285  	// Setup the heap. brk starts at the next page after the end of the
   286  	// executable. Userspace can assume that the remainder of the page after
   287  	// loaded.end is available for its use.
   288  	e, ok := loaded.end.RoundUp()
   289  	if !ok {
   290  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("brk overflows: %#x", loaded.end), errno.ENOEXEC)
   291  	}
   292  	args.MemoryManager.BrkSetup(ctx, e)
   293  
   294  	// Allocate our stack.
   295  	stack, err := allocStack(ctx, args.MemoryManager, ac)
   296  	if err != nil {
   297  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to allocate stack: %v", err), syserr.FromError(err).ToLinux())
   298  	}
   299  
   300  	// Push the original filename to the stack, for AT_EXECFN.
   301  	if _, err := stack.PushNullTerminatedByteSlice([]byte(args.Filename)); err != nil {
   302  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to push exec filename: %v", err), syserr.FromError(err).ToLinux())
   303  	}
   304  	execfn := stack.Bottom
   305  
   306  	// Push 16 random bytes on the stack which AT_RANDOM will point to.
   307  	var b [16]byte
   308  	if _, err := rand.Read(b[:]); err != nil {
   309  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to read random bytes: %v", err), syserr.FromError(err).ToLinux())
   310  	}
   311  	if _, err = stack.PushNullTerminatedByteSlice(b[:]); err != nil {
   312  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to push random bytes: %v", err), syserr.FromError(err).ToLinux())
   313  	}
   314  	random := stack.Bottom
   315  
   316  	c := auth.CredentialsFromContext(ctx)
   317  
   318  	// Add generic auxv entries.
   319  	auxv := append(loaded.auxv, arch.Auxv{
   320  		arch.AuxEntry{linux.AT_UID, hostarch.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())},
   321  		arch.AuxEntry{linux.AT_EUID, hostarch.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())},
   322  		arch.AuxEntry{linux.AT_GID, hostarch.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())},
   323  		arch.AuxEntry{linux.AT_EGID, hostarch.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())},
   324  		// The conditions that require AT_SECURE = 1 never arise. See
   325  		// kernel.Task.updateCredsForExecLocked.
   326  		arch.AuxEntry{linux.AT_SECURE, 0},
   327  		arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC},
   328  		arch.AuxEntry{linux.AT_EXECFN, execfn},
   329  		arch.AuxEntry{linux.AT_RANDOM, random},
   330  		arch.AuxEntry{linux.AT_PAGESZ, hostarch.PageSize},
   331  		arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr},
   332  	}...)
   333  	auxv = append(auxv, extraAuxv...)
   334  
   335  	sl, err := stack.Load(newArgv, args.Envv, auxv)
   336  	if err != nil {
   337  		return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("Failed to load stack: %v", err), syserr.FromError(err).ToLinux())
   338  	}
   339  
   340  	m := args.MemoryManager
   341  	m.SetArgvStart(sl.ArgvStart)
   342  	m.SetArgvEnd(sl.ArgvEnd)
   343  	m.SetEnvvStart(sl.EnvvStart)
   344  	m.SetEnvvEnd(sl.EnvvEnd)
   345  	m.SetAuxv(auxv)
   346  	m.SetExecutable(ctx, file)
   347  	m.SetVDSOSigReturn(uint64(vdsoAddr) + vdsoSigreturnOffset - vdsoPrelink)
   348  
   349  	ac.SetIP(uintptr(loaded.entry))
   350  	ac.SetStack(uintptr(stack.Bottom))
   351  
   352  	name := path.Base(args.Filename)
   353  	if len(name) > linux.TASK_COMM_LEN-1 {
   354  		name = name[:linux.TASK_COMM_LEN-1]
   355  	}
   356  
   357  	return ImageInfo{
   358  		OS:       loaded.os,
   359  		Arch:     ac,
   360  		Name:     name,
   361  		FileCaps: xattr,
   362  	}, nil
   363  }