gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/task_image.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"gvisor.dev/gvisor/pkg/abi/linux/errno"
    21  	"gvisor.dev/gvisor/pkg/context"
    22  	"gvisor.dev/gvisor/pkg/hostarch"
    23  	"gvisor.dev/gvisor/pkg/sentry/arch"
    24  	"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
    25  	"gvisor.dev/gvisor/pkg/sentry/loader"
    26  	"gvisor.dev/gvisor/pkg/sentry/mm"
    27  	"gvisor.dev/gvisor/pkg/syserr"
    28  )
    29  
    30  var errNoSyscalls = syserr.New("no syscall table found", errno.ENOEXEC)
    31  
    32  // Auxmap contains miscellaneous data for the task.
    33  type Auxmap map[string]any
    34  
    35  // TaskImage is the subset of a task's data that is provided by the loader.
    36  //
    37  // +stateify savable
    38  type TaskImage struct {
    39  	// Name is the thread name set by the prctl(PR_SET_NAME) system call.
    40  	Name string
    41  
    42  	// Arch is the architecture-specific context (registers, etc.)
    43  	Arch *arch.Context64
    44  
    45  	// MemoryManager is the task's address space.
    46  	MemoryManager *mm.MemoryManager
    47  
    48  	// fu implements futexes in the address space.
    49  	fu *futex.Manager
    50  
    51  	// st is the task's syscall table.
    52  	st *SyscallTable `state:".(syscallTableInfo)"`
    53  
    54  	// fileCaps is the image's extended attribute named security.capability.
    55  	fileCaps string
    56  }
    57  
    58  // FileCaps return the task image's security.capability extended attribute.
    59  func (image *TaskImage) FileCaps() string {
    60  	return image.fileCaps
    61  }
    62  
    63  // release releases all resources held by the TaskImage. release is called by
    64  // the task when it execs into a new TaskImage.
    65  func (image *TaskImage) release(ctx context.Context) {
    66  	// Nil out pointers so that if the task is saved after release, it doesn't
    67  	// follow the pointers to possibly now-invalid objects.
    68  	if image.MemoryManager != nil {
    69  		image.MemoryManager.DecUsers(ctx)
    70  		image.MemoryManager = nil
    71  	}
    72  	image.fu = nil
    73  }
    74  
    75  // Fork returns a duplicate of image. The copied TaskImage always has an
    76  // independent arch.Context64. If shareAddressSpace is true, the copied
    77  // TaskImage shares an address space with the original; otherwise, the copied
    78  // TaskImage has an independent address space that is initially a duplicate
    79  // of the original's.
    80  func (image *TaskImage) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskImage, error) {
    81  	newImage := &TaskImage{
    82  		Name: image.Name,
    83  		Arch: image.Arch.Fork(),
    84  		st:   image.st,
    85  	}
    86  	if shareAddressSpace {
    87  		newImage.MemoryManager = image.MemoryManager
    88  		if newImage.MemoryManager != nil {
    89  			if !newImage.MemoryManager.IncUsers() {
    90  				// Shouldn't be possible since image.MemoryManager should be a
    91  				// counted user.
    92  				panic(fmt.Sprintf("TaskImage.Fork called with userless TaskImage.MemoryManager"))
    93  			}
    94  		}
    95  		newImage.fu = image.fu
    96  	} else {
    97  		newMM, err := image.MemoryManager.Fork(ctx)
    98  		if err != nil {
    99  			return nil, err
   100  		}
   101  		newImage.MemoryManager = newMM
   102  		newImage.fu = k.futexes.Fork()
   103  	}
   104  	return newImage, nil
   105  }
   106  
   107  // Arch returns t's arch.Context64.
   108  //
   109  // Preconditions: The caller must be running on the task goroutine, or t.mu
   110  // must be locked.
   111  func (t *Task) Arch() *arch.Context64 {
   112  	return t.image.Arch
   113  }
   114  
   115  // MemoryManager returns t's MemoryManager. MemoryManager does not take an
   116  // additional reference on the returned MM.
   117  //
   118  // Preconditions: The caller must be running on the task goroutine, or t.mu
   119  // must be locked.
   120  func (t *Task) MemoryManager() *mm.MemoryManager {
   121  	return t.image.MemoryManager
   122  }
   123  
   124  // SyscallTable returns t's syscall table.
   125  //
   126  // Preconditions: The caller must be running on the task goroutine, or t.mu
   127  // must be locked.
   128  func (t *Task) SyscallTable() *SyscallTable {
   129  	return t.image.st
   130  }
   131  
   132  // Stack returns the userspace stack.
   133  //
   134  // Preconditions: The caller must be running on the task goroutine, or t.mu
   135  // must be locked.
   136  func (t *Task) Stack() *arch.Stack {
   137  	return &arch.Stack{
   138  		Arch:   t.Arch(),
   139  		IO:     t.MemoryManager(),
   140  		Bottom: hostarch.Addr(t.Arch().Stack()),
   141  	}
   142  }
   143  
   144  // LoadTaskImage loads a specified file into a new TaskImage.
   145  //
   146  // args.MemoryManager does not need to be set by the caller.
   147  func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskImage, *syserr.Error) {
   148  	// Prepare a new user address space to load into.
   149  	m := mm.NewMemoryManager(k, k.mf, k.SleepForAddressSpaceActivation)
   150  	defer m.DecUsers(ctx)
   151  	args.MemoryManager = m
   152  
   153  	info, err := loader.Load(ctx, args, k.extraAuxv, k.vdso)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  
   158  	// Lookup our new syscall table.
   159  	st, ok := LookupSyscallTable(info.OS, info.Arch.Arch())
   160  	if !ok {
   161  		// No syscall table found. This means that the ELF binary does not match
   162  		// the architecture.
   163  		return nil, errNoSyscalls
   164  	}
   165  
   166  	if !m.IncUsers() {
   167  		panic("Failed to increment users count on new MM")
   168  	}
   169  	return &TaskImage{
   170  		Name:          info.Name,
   171  		Arch:          info.Arch,
   172  		MemoryManager: m,
   173  		fu:            k.futexes.Fork(),
   174  		st:            st,
   175  		fileCaps:      info.FileCaps,
   176  	}, nil
   177  }