github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/shim/proc/init.go (about)

     1  // Copyright 2018 The containerd Authors.
     2  // Copyright 2018 The gVisor Authors.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     https://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package proc
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"path/filepath"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/containerd/console"
    29  	"github.com/containerd/containerd/errdefs"
    30  	"github.com/containerd/containerd/log"
    31  	"github.com/containerd/containerd/mount"
    32  	"github.com/containerd/containerd/pkg/process"
    33  	"github.com/containerd/containerd/pkg/stdio"
    34  	"github.com/containerd/fifo"
    35  	runc "github.com/containerd/go-runc"
    36  	specs "github.com/opencontainers/runtime-spec/specs-go"
    37  	"golang.org/x/sys/unix"
    38  
    39  	"github.com/SagerNet/gvisor/pkg/shim/runsc"
    40  )
    41  
    42  const statusStopped = "stopped"
    43  
    44  // Init represents an initial process for a container.
    45  type Init struct {
    46  	wg        sync.WaitGroup
    47  	initState initState
    48  
    49  	// mu is used to ensure that `Start()` and `Exited()` calls return in
    50  	// the right order when invoked in separate go routines.  This is the
    51  	// case within the shim implementation as it makes use of the reaper
    52  	// interface.
    53  	mu sync.Mutex
    54  
    55  	waitBlock chan struct{}
    56  
    57  	WorkDir string
    58  
    59  	id       string
    60  	Bundle   string
    61  	console  console.Console
    62  	Platform stdio.Platform
    63  	io       runc.IO
    64  	runtime  *runsc.Runsc
    65  	status   int
    66  	exited   time.Time
    67  	pid      int
    68  	closers  []io.Closer
    69  	stdin    io.Closer
    70  	stdio    stdio.Stdio
    71  	Rootfs   string
    72  	IoUID    int
    73  	IoGID    int
    74  	Sandbox  bool
    75  	UserLog  string
    76  	Monitor  ProcessMonitor
    77  }
    78  
    79  // NewRunsc returns a new runsc instance for a process.
    80  func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc {
    81  	if root == "" {
    82  		root = RunscRoot
    83  	}
    84  	return &runsc.Runsc{
    85  		Command:      runtime,
    86  		PdeathSignal: unix.SIGKILL,
    87  		Log:          filepath.Join(path, "log.json"),
    88  		LogFormat:    runc.JSON,
    89  		Root:         filepath.Join(root, namespace),
    90  		Config:       config,
    91  	}
    92  }
    93  
    94  // New returns a new init process.
    95  func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init {
    96  	p := &Init{
    97  		id:        id,
    98  		runtime:   runtime,
    99  		stdio:     stdio,
   100  		status:    0,
   101  		waitBlock: make(chan struct{}),
   102  	}
   103  	p.initState = &createdState{p: p}
   104  	return p
   105  }
   106  
   107  // Create the process with the provided config.
   108  func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) {
   109  	var socket *runc.Socket
   110  	if r.Terminal {
   111  		if socket, err = runc.NewTempConsoleSocket(); err != nil {
   112  			return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
   113  		}
   114  		defer socket.Close()
   115  	} else if hasNoIO(r) {
   116  		if p.io, err = runc.NewNullIO(); err != nil {
   117  			return fmt.Errorf("creating new NULL IO: %w", err)
   118  		}
   119  	} else {
   120  		if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil {
   121  			return fmt.Errorf("failed to create OCI runtime io pipes: %w", err)
   122  		}
   123  	}
   124  	// pidFile is the file that will contain the sandbox pid.
   125  	pidFile := filepath.Join(p.Bundle, "init.pid")
   126  	opts := &runsc.CreateOpts{
   127  		PidFile: pidFile,
   128  	}
   129  	if socket != nil {
   130  		opts.ConsoleSocket = socket
   131  	}
   132  	if p.Sandbox {
   133  		opts.IO = p.io
   134  		// UserLog is only useful for sandbox.
   135  		opts.UserLog = p.UserLog
   136  	}
   137  	if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
   138  		return p.runtimeError(err, "OCI runtime create failed")
   139  	}
   140  	if r.Stdin != "" {
   141  		sc, err := fifo.OpenFifo(context.Background(), r.Stdin, unix.O_WRONLY|unix.O_NONBLOCK, 0)
   142  		if err != nil {
   143  			return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err)
   144  		}
   145  		p.stdin = sc
   146  		p.closers = append(p.closers, sc)
   147  	}
   148  	ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
   149  	defer cancel()
   150  	if socket != nil {
   151  		console, err := socket.ReceiveMaster()
   152  		if err != nil {
   153  			return fmt.Errorf("failed to retrieve console master: %w", err)
   154  		}
   155  		console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg)
   156  		if err != nil {
   157  			return fmt.Errorf("failed to start console copy: %w", err)
   158  		}
   159  		p.console = console
   160  	} else if !hasNoIO(r) {
   161  		if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil {
   162  			return fmt.Errorf("failed to start io pipe copy: %w", err)
   163  		}
   164  	}
   165  	pid, err := runc.ReadPidFile(pidFile)
   166  	if err != nil {
   167  		return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
   168  	}
   169  	p.pid = pid
   170  	return nil
   171  }
   172  
   173  // Wait waits for the process to exit.
   174  func (p *Init) Wait() {
   175  	<-p.waitBlock
   176  }
   177  
   178  // ID returns the ID of the process.
   179  func (p *Init) ID() string {
   180  	return p.id
   181  }
   182  
   183  // Pid returns the PID of the process.
   184  func (p *Init) Pid() int {
   185  	return p.pid
   186  }
   187  
   188  // ExitStatus returns the exit status of the process.
   189  func (p *Init) ExitStatus() int {
   190  	p.mu.Lock()
   191  	defer p.mu.Unlock()
   192  	return p.status
   193  }
   194  
   195  // ExitedAt returns the time when the process exited.
   196  func (p *Init) ExitedAt() time.Time {
   197  	p.mu.Lock()
   198  	defer p.mu.Unlock()
   199  	return p.exited
   200  }
   201  
   202  // Status returns the status of the process.
   203  func (p *Init) Status(ctx context.Context) (string, error) {
   204  	p.mu.Lock()
   205  	defer p.mu.Unlock()
   206  
   207  	return p.initState.State(ctx)
   208  }
   209  
   210  func (p *Init) state(ctx context.Context) (string, error) {
   211  	c, err := p.runtime.State(ctx, p.id)
   212  	if err != nil {
   213  		if strings.Contains(err.Error(), "does not exist") {
   214  			return statusStopped, nil
   215  		}
   216  		return "", p.runtimeError(err, "OCI runtime state failed")
   217  	}
   218  	return p.convertStatus(c.Status), nil
   219  }
   220  
   221  // Start starts the init process.
   222  func (p *Init) Start(ctx context.Context) error {
   223  	p.mu.Lock()
   224  	defer p.mu.Unlock()
   225  
   226  	return p.initState.Start(ctx)
   227  }
   228  
   229  func (p *Init) start(ctx context.Context) error {
   230  	var cio runc.IO
   231  	if !p.Sandbox {
   232  		cio = p.io
   233  	}
   234  	if err := p.runtime.Start(ctx, p.id, cio); err != nil {
   235  		return p.runtimeError(err, "OCI runtime start failed")
   236  	}
   237  	go func() {
   238  		status, err := p.runtime.Wait(context.Background(), p.id)
   239  		if err != nil {
   240  			log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id)
   241  			p.killAllLocked(ctx)
   242  			status = internalErrorCode
   243  		}
   244  		ExitCh <- Exit{
   245  			Timestamp: time.Now(),
   246  			ID:        p.id,
   247  			Status:    status,
   248  		}
   249  	}()
   250  	return nil
   251  }
   252  
   253  // SetExited set the exit stauts of the init process.
   254  func (p *Init) SetExited(status int) {
   255  	p.mu.Lock()
   256  	defer p.mu.Unlock()
   257  
   258  	p.initState.SetExited(status)
   259  }
   260  
   261  func (p *Init) setExited(status int) {
   262  	if !p.exited.IsZero() {
   263  		log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status)
   264  		return
   265  	}
   266  
   267  	log.L.Debugf("Setting status: %d", status)
   268  	p.exited = time.Now()
   269  	p.status = status
   270  	p.Platform.ShutdownConsole(context.Background(), p.console)
   271  	close(p.waitBlock)
   272  }
   273  
   274  // Delete deletes the init process.
   275  func (p *Init) Delete(ctx context.Context) error {
   276  	p.mu.Lock()
   277  	defer p.mu.Unlock()
   278  
   279  	return p.initState.Delete(ctx)
   280  }
   281  
   282  func (p *Init) delete(ctx context.Context) error {
   283  	p.killAllLocked(ctx)
   284  	p.wg.Wait()
   285  
   286  	err := p.runtime.Delete(ctx, p.id, nil)
   287  	if err != nil {
   288  		// ignore errors if a runtime has already deleted the process
   289  		// but we still hold metadata and pipes
   290  		//
   291  		// this is common during a checkpoint, runc will delete the container state
   292  		// after a checkpoint and the container will no longer exist within runc
   293  		if strings.Contains(err.Error(), "does not exist") {
   294  			err = nil
   295  		} else {
   296  			err = p.runtimeError(err, "failed to delete task")
   297  		}
   298  	}
   299  	if p.io != nil {
   300  		for _, c := range p.closers {
   301  			c.Close()
   302  		}
   303  		p.io.Close()
   304  	}
   305  	if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
   306  		log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
   307  		if err == nil {
   308  			err = fmt.Errorf("failed rootfs umount: %w", err2)
   309  		}
   310  	}
   311  	return err
   312  }
   313  
   314  // Resize resizes the init processes console.
   315  func (p *Init) Resize(ws console.WinSize) error {
   316  	p.mu.Lock()
   317  	defer p.mu.Unlock()
   318  
   319  	if p.console == nil {
   320  		return nil
   321  	}
   322  	return p.console.Resize(ws)
   323  }
   324  
   325  func (p *Init) resize(ws console.WinSize) error {
   326  	if p.console == nil {
   327  		return nil
   328  	}
   329  	return p.console.Resize(ws)
   330  }
   331  
   332  // Kill kills the init process.
   333  func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
   334  	p.mu.Lock()
   335  	defer p.mu.Unlock()
   336  
   337  	return p.initState.Kill(ctx, signal, all)
   338  }
   339  
   340  func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
   341  	var (
   342  		killErr error
   343  		backoff = 100 * time.Millisecond
   344  	)
   345  	const timeout = time.Second
   346  	for start := time.Now(); time.Since(start) < timeout; {
   347  		state, err := p.initState.State(ctx)
   348  		if err != nil {
   349  			return p.runtimeError(err, "OCI runtime state failed")
   350  		}
   351  		// For runsc, signal only works when container is running state.
   352  		// If the container is not in running state, directly return
   353  		// "no such process"
   354  		if state == statusStopped {
   355  			return fmt.Errorf("no such process: %w", errdefs.ErrNotFound)
   356  		}
   357  		killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all})
   358  		if killErr == nil {
   359  			return nil
   360  		}
   361  		time.Sleep(backoff)
   362  		backoff *= 2
   363  	}
   364  	return p.runtimeError(killErr, "kill timeout")
   365  }
   366  
   367  // KillAll kills all processes belonging to the init process. If
   368  // `runsc kill --all` returns error, assume the container has already stopped.
   369  func (p *Init) KillAll(context context.Context) {
   370  	p.mu.Lock()
   371  	defer p.mu.Unlock()
   372  	p.killAllLocked(context)
   373  }
   374  
   375  func (p *Init) killAllLocked(context context.Context) {
   376  	if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil {
   377  		log.L.Warningf("Ignoring error killing container %q: %v", p.id, err)
   378  	}
   379  }
   380  
   381  // Stdin returns the stdin of the process.
   382  func (p *Init) Stdin() io.Closer {
   383  	return p.stdin
   384  }
   385  
   386  // Runtime returns the OCI runtime configured for the init process.
   387  func (p *Init) Runtime() *runsc.Runsc {
   388  	return p.runtime
   389  }
   390  
   391  // Exec returns a new child process.
   392  func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
   393  	p.mu.Lock()
   394  	defer p.mu.Unlock()
   395  
   396  	return p.initState.Exec(ctx, path, r)
   397  }
   398  
   399  // exec returns a new exec'd process.
   400  func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) {
   401  	var spec specs.Process
   402  	if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
   403  		return nil, err
   404  	}
   405  	spec.Terminal = r.Terminal
   406  
   407  	e := &execProcess{
   408  		id:     r.ID,
   409  		path:   path,
   410  		parent: p,
   411  		spec:   spec,
   412  		stdio: stdio.Stdio{
   413  			Stdin:    r.Stdin,
   414  			Stdout:   r.Stdout,
   415  			Stderr:   r.Stderr,
   416  			Terminal: r.Terminal,
   417  		},
   418  		waitBlock: make(chan struct{}),
   419  	}
   420  	e.execState = &execCreatedState{p: e}
   421  	return e, nil
   422  }
   423  
   424  func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) {
   425  	p.mu.Lock()
   426  	defer p.mu.Unlock()
   427  
   428  	return p.initState.Stats(ctx, id)
   429  }
   430  
   431  func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) {
   432  	return p.Runtime().Stats(ctx, id)
   433  }
   434  
   435  // Stdio returns the stdio of the process.
   436  func (p *Init) Stdio() stdio.Stdio {
   437  	return p.stdio
   438  }
   439  
   440  func (p *Init) runtimeError(rErr error, msg string) error {
   441  	if rErr == nil {
   442  		return nil
   443  	}
   444  
   445  	rMsg, err := getLastRuntimeError(p.runtime)
   446  	switch {
   447  	case err != nil:
   448  		return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err)
   449  	case rMsg == "":
   450  		return fmt.Errorf("%s: %w", msg, rErr)
   451  	default:
   452  		return fmt.Errorf("%s: %s", msg, rMsg)
   453  	}
   454  }
   455  
   456  func (p *Init) convertStatus(status string) string {
   457  	if status == "created" && !p.Sandbox && p.status == internalErrorCode {
   458  		// Treat start failure state for non-root container as stopped.
   459  		return statusStopped
   460  	}
   461  	return status
   462  }
   463  
   464  func withConditionalIO(c stdio.Stdio) runc.IOOpt {
   465  	return func(o *runc.IOOption) {
   466  		o.OpenStdin = c.Stdin != ""
   467  		o.OpenStdout = c.Stdout != ""
   468  		o.OpenStderr = c.Stderr != ""
   469  	}
   470  }