github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/shim/proc/init.go (about)

     1  // Copyright 2018 The containerd Authors.
     2  // Copyright 2018 The gVisor Authors.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     https://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package proc
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"path/filepath"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/containerd/console"
    29  
    30  	"github.com/containerd/containerd/errdefs"
    31  	"github.com/containerd/containerd/log"
    32  	"github.com/containerd/containerd/mount"
    33  	"github.com/containerd/containerd/pkg/process"
    34  	"github.com/containerd/containerd/pkg/stdio"
    35  
    36  	"github.com/containerd/fifo"
    37  	runc "github.com/containerd/go-runc"
    38  	specs "github.com/opencontainers/runtime-spec/specs-go"
    39  	"golang.org/x/sys/unix"
    40  	"github.com/metacubex/gvisor/pkg/shim/runsc"
    41  	"github.com/metacubex/gvisor/pkg/shim/utils"
    42  )
    43  
    44  const statusStopped = "stopped"
    45  
    46  // Init represents an initial process for a container.
    47  type Init struct {
    48  	wg        sync.WaitGroup
    49  	initState initState
    50  
    51  	// mu is used to ensure that `Start()` and `Exited()` calls return in
    52  	// the right order when invoked in separate go routines.  This is the
    53  	// case within the shim implementation as it makes use of the reaper
    54  	// interface.
    55  	mu sync.Mutex
    56  
    57  	waitBlock chan struct{}
    58  
    59  	WorkDir string
    60  
    61  	id       string
    62  	Bundle   string
    63  	console  console.Console
    64  	Platform stdio.Platform
    65  	io       runc.IO
    66  	runtime  *runsc.Runsc
    67  	status   int
    68  	exited   time.Time
    69  	pid      int
    70  	closers  []io.Closer
    71  	stdin    io.Closer
    72  	stdio    stdio.Stdio
    73  	Rootfs   string
    74  	IoUID    int
    75  	IoGID    int
    76  	Sandbox  bool
    77  	UserLog  string
    78  	Monitor  ProcessMonitor
    79  }
    80  
    81  // NewRunsc returns a new runsc instance for a process.
    82  func NewRunsc(root, path, namespace, runtime string, config map[string]string, spec *specs.Spec) *runsc.Runsc {
    83  	if root == "" {
    84  		root = RunscRoot
    85  	}
    86  	return &runsc.Runsc{
    87  		Command:      runtime,
    88  		PdeathSignal: unix.SIGKILL,
    89  		Log:          filepath.Join(path, "log.json"),
    90  		LogFormat:    runc.JSON,
    91  		PanicLog:     utils.PanicLogPath(spec),
    92  		Root:         filepath.Join(root, namespace),
    93  		Config:       config,
    94  	}
    95  }
    96  
    97  // New returns a new init process.
    98  func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init {
    99  	p := &Init{
   100  		id:        id,
   101  		runtime:   runtime,
   102  		stdio:     stdio,
   103  		status:    0,
   104  		waitBlock: make(chan struct{}),
   105  	}
   106  	p.initState = &createdState{p: p}
   107  	return p
   108  }
   109  
   110  // Create the process with the provided config.
   111  func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) {
   112  	var socket *runc.Socket
   113  	if r.Terminal {
   114  		if socket, err = runc.NewTempConsoleSocket(); err != nil {
   115  			return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
   116  		}
   117  		defer socket.Close()
   118  	} else if hasNoIO(r) {
   119  		if p.io, err = runc.NewNullIO(); err != nil {
   120  			return fmt.Errorf("creating new NULL IO: %w", err)
   121  		}
   122  	} else {
   123  		if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil {
   124  			return fmt.Errorf("failed to create OCI runtime io pipes: %w", err)
   125  		}
   126  	}
   127  	// pidFile is the file that will contain the sandbox pid.
   128  	pidFile := filepath.Join(p.Bundle, "init.pid")
   129  	opts := &runsc.CreateOpts{
   130  		PidFile: pidFile,
   131  	}
   132  	if socket != nil {
   133  		opts.ConsoleSocket = socket
   134  	}
   135  	if p.Sandbox {
   136  		opts.IO = p.io
   137  		// UserLog is only useful for sandbox.
   138  		opts.UserLog = p.UserLog
   139  	}
   140  	if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
   141  		return p.runtimeError(err, "OCI runtime create failed")
   142  	}
   143  	if r.Stdin != "" {
   144  		sc, err := fifo.OpenFifo(context.Background(), r.Stdin, unix.O_WRONLY|unix.O_NONBLOCK, 0)
   145  		if err != nil {
   146  			return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err)
   147  		}
   148  		p.stdin = sc
   149  		p.closers = append(p.closers, sc)
   150  	}
   151  	ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
   152  	defer cancel()
   153  	if socket != nil {
   154  		console, err := socket.ReceiveMaster()
   155  		if err != nil {
   156  			return fmt.Errorf("failed to retrieve console master: %w", err)
   157  		}
   158  		console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg)
   159  		if err != nil {
   160  			return fmt.Errorf("failed to start console copy: %w", err)
   161  		}
   162  		p.console = console
   163  	} else if !hasNoIO(r) {
   164  		if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil {
   165  			return fmt.Errorf("failed to start io pipe copy: %w", err)
   166  		}
   167  	}
   168  	pid, err := runc.ReadPidFile(pidFile)
   169  	if err != nil {
   170  		return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
   171  	}
   172  	p.pid = pid
   173  	return nil
   174  }
   175  
   176  // Wait waits for the process to exit.
   177  func (p *Init) Wait() {
   178  	<-p.waitBlock
   179  }
   180  
   181  // ID returns the ID of the process.
   182  func (p *Init) ID() string {
   183  	return p.id
   184  }
   185  
   186  // Pid returns the PID of the process.
   187  func (p *Init) Pid() int {
   188  	return p.pid
   189  }
   190  
   191  // ExitStatus returns the exit status of the process.
   192  func (p *Init) ExitStatus() int {
   193  	p.mu.Lock()
   194  	defer p.mu.Unlock()
   195  	return p.status
   196  }
   197  
   198  // ExitedAt returns the time when the process exited.
   199  func (p *Init) ExitedAt() time.Time {
   200  	p.mu.Lock()
   201  	defer p.mu.Unlock()
   202  	return p.exited
   203  }
   204  
   205  // Status returns the status of the process.
   206  func (p *Init) Status(ctx context.Context) (string, error) {
   207  	p.mu.Lock()
   208  	defer p.mu.Unlock()
   209  
   210  	return p.initState.State(ctx)
   211  }
   212  
   213  func (p *Init) state(ctx context.Context) (string, error) {
   214  	c, err := p.runtime.State(ctx, p.id)
   215  	if err != nil {
   216  		if strings.Contains(err.Error(), "does not exist") {
   217  			return statusStopped, nil
   218  		}
   219  		return "", p.runtimeError(err, "OCI runtime state failed")
   220  	}
   221  	return p.convertStatus(c.Status), nil
   222  }
   223  
   224  // Start starts the init process.
   225  func (p *Init) Start(ctx context.Context) error {
   226  	p.mu.Lock()
   227  	defer p.mu.Unlock()
   228  
   229  	return p.initState.Start(ctx)
   230  }
   231  
   232  func (p *Init) start(ctx context.Context) error {
   233  	var cio runc.IO
   234  	if !p.Sandbox {
   235  		cio = p.io
   236  	}
   237  	if err := p.runtime.Start(ctx, p.id, cio); err != nil {
   238  		return p.runtimeError(err, "OCI runtime start failed")
   239  	}
   240  	go func() {
   241  		status, err := p.runtime.Wait(context.Background(), p.id)
   242  		if err != nil {
   243  			log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id)
   244  			p.killAllLocked(ctx)
   245  			status = internalErrorCode
   246  		}
   247  		ExitCh <- Exit{
   248  			Timestamp: time.Now(),
   249  			ID:        p.id,
   250  			Status:    status,
   251  		}
   252  	}()
   253  	return nil
   254  }
   255  
   256  // SetExited set the exit stauts of the init process.
   257  func (p *Init) SetExited(status int) {
   258  	p.mu.Lock()
   259  	defer p.mu.Unlock()
   260  
   261  	p.initState.SetExited(status)
   262  }
   263  
   264  func (p *Init) setExited(status int) {
   265  	if !p.exited.IsZero() {
   266  		log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status)
   267  		return
   268  	}
   269  
   270  	log.L.Debugf("Setting status: %d", status)
   271  	p.exited = time.Now()
   272  	p.status = status
   273  	p.Platform.ShutdownConsole(context.Background(), p.console)
   274  	close(p.waitBlock)
   275  }
   276  
   277  // Delete deletes the init process.
   278  func (p *Init) Delete(ctx context.Context) error {
   279  	p.mu.Lock()
   280  	defer p.mu.Unlock()
   281  
   282  	return p.initState.Delete(ctx)
   283  }
   284  
   285  func (p *Init) delete(ctx context.Context) error {
   286  	p.killAllLocked(ctx)
   287  	p.wg.Wait()
   288  
   289  	err := p.runtime.Delete(ctx, p.id, nil)
   290  	if err != nil {
   291  		// ignore errors if a runtime has already deleted the process
   292  		// but we still hold metadata and pipes
   293  		//
   294  		// this is common during a checkpoint, runc will delete the container state
   295  		// after a checkpoint and the container will no longer exist within runc
   296  		if strings.Contains(err.Error(), "does not exist") {
   297  			err = nil
   298  		} else {
   299  			err = p.runtimeError(err, "failed to delete task")
   300  		}
   301  	}
   302  	if p.io != nil {
   303  		for _, c := range p.closers {
   304  			c.Close()
   305  		}
   306  		p.io.Close()
   307  	}
   308  	if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
   309  		log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
   310  		if err == nil {
   311  			err = fmt.Errorf("failed rootfs umount: %w", err2)
   312  		}
   313  	}
   314  	return err
   315  }
   316  
   317  // Resize resizes the init processes console.
   318  func (p *Init) Resize(ws console.WinSize) error {
   319  	p.mu.Lock()
   320  	defer p.mu.Unlock()
   321  
   322  	if p.console == nil {
   323  		return nil
   324  	}
   325  	return p.console.Resize(ws)
   326  }
   327  
   328  func (p *Init) resize(ws console.WinSize) error {
   329  	if p.console == nil {
   330  		return nil
   331  	}
   332  	return p.console.Resize(ws)
   333  }
   334  
   335  // Kill kills the init process.
   336  func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
   337  	p.mu.Lock()
   338  	defer p.mu.Unlock()
   339  
   340  	return p.initState.Kill(ctx, signal, all)
   341  }
   342  
   343  func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
   344  	var (
   345  		killErr error
   346  		backoff = 100 * time.Millisecond
   347  	)
   348  	const timeout = time.Second
   349  	for start := time.Now(); time.Since(start) < timeout; {
   350  		state, err := p.initState.State(ctx)
   351  		if err != nil {
   352  			return p.runtimeError(err, "OCI runtime state failed")
   353  		}
   354  		// For runsc, signal only works when container is running state.
   355  		// If the container is not in running state, directly return
   356  		// "no such process"
   357  		if state == statusStopped {
   358  			return fmt.Errorf("no such process: %w", errdefs.ErrNotFound)
   359  		}
   360  		killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all})
   361  		if killErr == nil {
   362  			return nil
   363  		}
   364  		time.Sleep(backoff)
   365  		backoff *= 2
   366  	}
   367  	return p.runtimeError(killErr, "kill timeout")
   368  }
   369  
   370  // KillAll kills all processes belonging to the init process. If
   371  // `runsc kill --all` returns error, assume the container has already stopped.
   372  func (p *Init) KillAll(context context.Context) {
   373  	p.mu.Lock()
   374  	defer p.mu.Unlock()
   375  	p.killAllLocked(context)
   376  }
   377  
   378  func (p *Init) killAllLocked(context context.Context) {
   379  	if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil {
   380  		log.L.Warningf("Ignoring error killing container %q: %v", p.id, err)
   381  	}
   382  }
   383  
   384  // Stdin returns the stdin of the process.
   385  func (p *Init) Stdin() io.Closer {
   386  	return p.stdin
   387  }
   388  
   389  // Runtime returns the OCI runtime configured for the init process.
   390  func (p *Init) Runtime() *runsc.Runsc {
   391  	return p.runtime
   392  }
   393  
   394  // Exec returns a new child process.
   395  func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
   396  	p.mu.Lock()
   397  	defer p.mu.Unlock()
   398  
   399  	return p.initState.Exec(ctx, path, r)
   400  }
   401  
   402  // exec returns a new exec'd process.
   403  func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) {
   404  	var spec specs.Process
   405  	if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
   406  		return nil, err
   407  	}
   408  	spec.Terminal = r.Terminal
   409  
   410  	e := &execProcess{
   411  		id:     r.ID,
   412  		path:   path,
   413  		parent: p,
   414  		spec:   spec,
   415  		stdio: stdio.Stdio{
   416  			Stdin:    r.Stdin,
   417  			Stdout:   r.Stdout,
   418  			Stderr:   r.Stderr,
   419  			Terminal: r.Terminal,
   420  		},
   421  		waitBlock: make(chan struct{}),
   422  	}
   423  	e.execState = &execCreatedState{p: e}
   424  	return e, nil
   425  }
   426  
   427  func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) {
   428  	p.mu.Lock()
   429  	defer p.mu.Unlock()
   430  
   431  	return p.initState.Stats(ctx, id)
   432  }
   433  
   434  func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) {
   435  	return p.Runtime().Stats(ctx, id)
   436  }
   437  
   438  // Stdio returns the stdio of the process.
   439  func (p *Init) Stdio() stdio.Stdio {
   440  	return p.stdio
   441  }
   442  
   443  func (p *Init) runtimeError(rErr error, msg string) error {
   444  	if rErr == nil {
   445  		return nil
   446  	}
   447  
   448  	rMsg, err := getLastRuntimeError(p.runtime)
   449  	switch {
   450  	case err != nil:
   451  		return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err)
   452  	case rMsg == "":
   453  		return fmt.Errorf("%s: %w", msg, rErr)
   454  	default:
   455  		return fmt.Errorf("%s: %s", msg, rMsg)
   456  	}
   457  }
   458  
   459  func (p *Init) convertStatus(status string) string {
   460  	if status == "created" && !p.Sandbox && p.status == internalErrorCode {
   461  		// Treat start failure state for non-root container as stopped.
   462  		return statusStopped
   463  	}
   464  	return status
   465  }
   466  
   467  func withConditionalIO(c stdio.Stdio) runc.IOOpt {
   468  	return func(o *runc.IOOption) {
   469  		o.OpenStdin = c.Stdin != ""
   470  		o.OpenStdout = c.Stdout != ""
   471  		o.OpenStderr = c.Stderr != ""
   472  	}
   473  }