gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/platform/ptrace/ptrace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ptrace provides a ptrace-based implementation of the platform
    16  // interface. This is useful for development and testing purposes primarily,
    17  // and runs on stock kernels without special permissions.
    18  //
    19  // In a nutshell, it works as follows:
    20  //
    21  // The creation of a new address space creates a new child process with a single
    22  // thread which is traced by a single goroutine.
    23  //
    24  // A context is just a collection of temporary variables. Calling Switch on a
    25  // context does the following:
    26  //
    27  //	Locks the runtime thread.
    28  //
    29  //	Looks up a traced subprocess thread for the current runtime thread. If
    30  //	none exists, the dedicated goroutine is asked to create a new stopped
    31  //	thread in the subprocess. This stopped subprocess thread is then traced
    32  //	by the current thread and this information is stored for subsequent
    33  //	switches.
    34  //
    35  //	The context is then bound with information about the subprocess thread
    36  //	so that the context may be appropriately interrupted via a signal.
    37  //
    38  //	The requested operation is performed in the traced subprocess thread
    39  //	(e.g. set registers, execute, return).
    40  //
    41  // Lock order:
    42  //
    43  //	 subprocess.mu
    44  //		context.mu
    45  package ptrace
    46  
    47  import (
    48  	"gvisor.dev/gvisor/pkg/abi/linux"
    49  	pkgcontext "gvisor.dev/gvisor/pkg/context"
    50  	"gvisor.dev/gvisor/pkg/fd"
    51  	"gvisor.dev/gvisor/pkg/hostarch"
    52  	"gvisor.dev/gvisor/pkg/sentry/arch"
    53  	"gvisor.dev/gvisor/pkg/sentry/platform"
    54  	"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
    55  	"gvisor.dev/gvisor/pkg/sync"
    56  )
    57  
    58  var (
    59  	// stubStart is the link address for our stub, and determines the
    60  	// maximum user address. This is valid only after a call to stubInit.
    61  	//
    62  	// We attempt to link the stub here, and adjust downward as needed.
    63  	stubStart uintptr = stubInitAddress
    64  
    65  	// stubEnd is the first byte past the end of the stub, as with
    66  	// stubStart this is valid only after a call to stubInit.
    67  	stubEnd uintptr
    68  
    69  	// stubInitialized controls one-time stub initialization.
    70  	stubInitialized sync.Once
    71  )
    72  
    73  type context struct {
    74  	archContext
    75  
    76  	// signalInfo is the signal info, if and when a signal is received.
    77  	signalInfo linux.SignalInfo
    78  
    79  	// interrupt is the interrupt context.
    80  	interrupt interrupt.Forwarder
    81  
    82  	// mu protects the following fields.
    83  	mu sync.Mutex
    84  
    85  	// If lastFaultSP is non-nil, the last context switch was due to a fault
    86  	// received while executing lastFaultSP. Only context.Switch may set
    87  	// lastFaultSP to a non-nil value.
    88  	lastFaultSP *subprocess
    89  
    90  	// lastFaultAddr is the last faulting address; this is only meaningful if
    91  	// lastFaultSP is non-nil.
    92  	lastFaultAddr hostarch.Addr
    93  
    94  	// lastFaultIP is the address of the last faulting instruction;
    95  	// this is also only meaningful if lastFaultSP is non-nil.
    96  	lastFaultIP hostarch.Addr
    97  }
    98  
    99  // NewContext implements platform.Platform.NewContext.
   100  func (*PTrace) NewContext(ctx pkgcontext.Context) platform.Context {
   101  	c := new(context)
   102  	c.archContext.init(ctx)
   103  	return c
   104  }
   105  
   106  // Switch runs the provided context in the given address space.
   107  func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, cpu int32) (*linux.SignalInfo, hostarch.AccessType, error) {
   108  	as := mm.AddressSpace()
   109  	s := as.(*subprocess)
   110  restart:
   111  	isSyscall := s.switchToApp(c, ac)
   112  
   113  	var (
   114  		faultSP   *subprocess
   115  		faultAddr hostarch.Addr
   116  		faultIP   hostarch.Addr
   117  	)
   118  	if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV {
   119  		faultSP = s
   120  		faultAddr = hostarch.Addr(c.signalInfo.Addr())
   121  		faultIP = hostarch.Addr(ac.IP())
   122  	}
   123  
   124  	// Update the context to reflect the outcome of this context switch.
   125  	c.mu.Lock()
   126  	lastFaultSP := c.lastFaultSP
   127  	lastFaultAddr := c.lastFaultAddr
   128  	lastFaultIP := c.lastFaultIP
   129  	// At this point, c may not yet be in s.contexts, so c.lastFaultSP won't be
   130  	// updated by s.Unmap(). This is fine; we only need to synchronize with
   131  	// calls to s.Unmap() that occur after the handling of this fault.
   132  	c.lastFaultSP = faultSP
   133  	c.lastFaultAddr = faultAddr
   134  	c.lastFaultIP = faultIP
   135  	c.mu.Unlock()
   136  
   137  	// Update subprocesses to reflect the outcome of this context switch.
   138  	if lastFaultSP != faultSP {
   139  		if lastFaultSP != nil {
   140  			lastFaultSP.mu.Lock()
   141  			delete(lastFaultSP.contexts, c)
   142  			lastFaultSP.mu.Unlock()
   143  		}
   144  		if faultSP != nil {
   145  			faultSP.mu.Lock()
   146  			faultSP.contexts[c] = struct{}{}
   147  			faultSP.mu.Unlock()
   148  		}
   149  	}
   150  
   151  	if isSyscall {
   152  		return nil, hostarch.NoAccess, nil
   153  	}
   154  
   155  	si := c.signalInfo
   156  	if faultSP == nil {
   157  		// Non-fault signal.
   158  		return &si, hostarch.NoAccess, platform.ErrContextSignal
   159  	}
   160  
   161  	// See if this can be handled as a CPUID instruction.
   162  	if linux.Signal(si.Signo) == linux.SIGSEGV && platform.TryCPUIDEmulate(ctx, mm, ac) {
   163  		goto restart
   164  	}
   165  
   166  	// Got a page fault. Ideally, we'd get real fault type here, but ptrace
   167  	// doesn't expose this information. Instead, we use a simple heuristic:
   168  	//
   169  	// It was an instruction fault iff the faulting addr == instruction
   170  	// pointer.
   171  	//
   172  	// It was a write fault if the fault is immediately repeated.
   173  	at := hostarch.Read
   174  	if faultAddr == faultIP {
   175  		at.Execute = true
   176  	}
   177  	if lastFaultSP == faultSP &&
   178  		lastFaultAddr == faultAddr &&
   179  		lastFaultIP == faultIP {
   180  		at.Write = true
   181  	}
   182  
   183  	// Handle as a signal.
   184  	return &si, at, platform.ErrContextSignal
   185  }
   186  
   187  // Interrupt interrupts the running guest application associated with this context.
   188  func (c *context) Interrupt() {
   189  	c.interrupt.NotifyInterrupt()
   190  }
   191  
   192  // Release implements platform.Context.Release().
   193  func (c *context) Release() {}
   194  
   195  // FullStateChanged implements platform.Context.FullStateChanged.
   196  func (c *context) FullStateChanged() {}
   197  
   198  // PullFullState implements platform.Context.PullFullState.
   199  func (c *context) PullFullState(as platform.AddressSpace, ac *arch.Context64) error { return nil }
   200  
   201  // PrepareSleep implements platform.Context.platform.PrepareSleep.
   202  func (*context) PrepareSleep() {}
   203  
   204  // PTrace represents a collection of ptrace subprocesses.
   205  type PTrace struct {
   206  	platform.MMapMinAddr
   207  	platform.NoCPUPreemptionDetection
   208  	platform.UseHostGlobalMemoryBarrier
   209  	platform.DoesNotOwnPageTables
   210  }
   211  
   212  // New returns a new ptrace-based implementation of the platform interface.
   213  func New() (*PTrace, error) {
   214  	stubInitialized.Do(func() {
   215  		// Initialize the stub.
   216  		stubInit()
   217  
   218  		// Create the master process for the global pool. This must be
   219  		// done before initializing any other processes.
   220  		master, err := newSubprocess(createStub)
   221  		if err != nil {
   222  			// Should never happen.
   223  			panic("unable to initialize ptrace master: " + err.Error())
   224  		}
   225  
   226  		// Set the master on the globalPool.
   227  		globalPool.master = master
   228  	})
   229  
   230  	return &PTrace{}, nil
   231  }
   232  
   233  // SupportsAddressSpaceIO implements platform.Platform.SupportsAddressSpaceIO.
   234  func (*PTrace) SupportsAddressSpaceIO() bool {
   235  	return false
   236  }
   237  
   238  // CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace.
   239  func (*PTrace) CooperativelySchedulesAddressSpace() bool {
   240  	return false
   241  }
   242  
   243  // MapUnit implements platform.Platform.MapUnit.
   244  func (*PTrace) MapUnit() uint64 {
   245  	// The host kernel manages page tables and arbitrary-sized mappings
   246  	// have effectively the same cost.
   247  	return 0
   248  }
   249  
   250  // MaxUserAddress returns the first address that may not be used by user
   251  // applications.
   252  func (*PTrace) MaxUserAddress() hostarch.Addr {
   253  	return hostarch.Addr(stubStart)
   254  }
   255  
   256  // NewAddressSpace returns a new subprocess.
   257  func (p *PTrace) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, error) {
   258  	as, err := newSubprocess(globalPool.master.createStub)
   259  	return as, nil, err
   260  }
   261  
   262  type constructor struct{}
   263  
   264  func (*constructor) New(*fd.FD) (platform.Platform, error) {
   265  	return New()
   266  }
   267  
   268  func (*constructor) OpenDevice(_ string) (*fd.FD, error) {
   269  	return nil, nil
   270  }
   271  
   272  // Flags implements platform.Constructor.Flags().
   273  func (*constructor) Requirements() platform.Requirements {
   274  	// TODO(b/75837838): Also set a new PID namespace so that we limit
   275  	// access to other host processes.
   276  	return platform.Requirements{
   277  		RequiresCapSysPtrace: true,
   278  		RequiresCurrentPIDNS: true,
   279  	}
   280  }
   281  
   282  func init() {
   283  	platform.Register("ptrace", &constructor{})
   284  }