github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/shim/service.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package shim implements Containerd Shim v2 interface.
    16  package shim
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"os"
    23  	"os/exec"
    24  	"path/filepath"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/BurntSushi/toml"
    30  	"github.com/containerd/cgroups"
    31  	cgroupsstats "github.com/containerd/cgroups/stats/v1"
    32  	"github.com/containerd/console"
    33  	"github.com/containerd/containerd/api/events"
    34  	"github.com/containerd/containerd/api/types/task"
    35  	"github.com/containerd/containerd/errdefs"
    36  	"github.com/containerd/containerd/log"
    37  	"github.com/containerd/containerd/mount"
    38  	"github.com/containerd/containerd/namespaces"
    39  	"github.com/containerd/containerd/pkg/process"
    40  	"github.com/containerd/containerd/pkg/stdio"
    41  	"github.com/containerd/containerd/runtime"
    42  	"github.com/containerd/containerd/runtime/linux/runctypes"
    43  	"github.com/containerd/containerd/runtime/v2/shim"
    44  	taskAPI "github.com/containerd/containerd/runtime/v2/task"
    45  	"github.com/containerd/containerd/sys/reaper"
    46  	"github.com/containerd/typeurl"
    47  	"github.com/gogo/protobuf/types"
    48  	specs "github.com/opencontainers/runtime-spec/specs-go"
    49  	"github.com/sirupsen/logrus"
    50  	"golang.org/x/sys/unix"
    51  	"github.com/SagerNet/gvisor/pkg/cleanup"
    52  
    53  	"github.com/SagerNet/gvisor/pkg/shim/proc"
    54  	"github.com/SagerNet/gvisor/pkg/shim/runsc"
    55  	"github.com/SagerNet/gvisor/pkg/shim/runtimeoptions"
    56  	"github.com/SagerNet/gvisor/pkg/shim/utils"
    57  	"github.com/SagerNet/gvisor/runsc/specutils"
    58  )
    59  
    60  var (
    61  	empty   = &types.Empty{}
    62  	bufPool = sync.Pool{
    63  		New: func() interface{} {
    64  			buffer := make([]byte, 32<<10)
    65  			return &buffer
    66  		},
    67  	}
    68  )
    69  
    70  var _ = (taskAPI.TaskService)(&service{})
    71  
    72  const (
    73  	// configFile is the default config file name. For containerd 1.2,
    74  	// we assume that a config.toml should exist in the runtime root.
    75  	configFile = "config.toml"
    76  
    77  	// shimAddressPath is the relative path to a file that contains the address
    78  	// to the shim UDS. See service.shimAddress.
    79  	shimAddressPath = "address"
    80  )
    81  
    82  // New returns a new shim service that can be used via GRPC.
    83  func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) {
    84  	var opts shim.Opts
    85  	if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil {
    86  		opts = ctxOpts.(shim.Opts)
    87  	}
    88  
    89  	ep, err := newOOMEpoller(publisher)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	go ep.run(ctx)
    94  	s := &service{
    95  		id:             id,
    96  		processes:      make(map[string]process.Process),
    97  		events:         make(chan interface{}, 128),
    98  		ec:             proc.ExitCh,
    99  		oomPoller:      ep,
   100  		cancel:         cancel,
   101  		genericOptions: opts,
   102  	}
   103  	go s.processExits(ctx)
   104  	runsc.Monitor = &runsc.LogMonitor{Next: reaper.Default}
   105  	if err := s.initPlatform(); err != nil {
   106  		cancel()
   107  		return nil, fmt.Errorf("failed to initialized platform behavior: %w", err)
   108  	}
   109  	go s.forward(ctx, publisher)
   110  
   111  	if address, err := shim.ReadAddress(shimAddressPath); err == nil {
   112  		s.shimAddress = address
   113  	}
   114  
   115  	return s, nil
   116  }
   117  
   118  // service is the shim implementation of a remote shim over GRPC. It runs in 2
   119  // different modes:
   120  //   1. Service: process runs for the life time of the container and receives
   121  //      calls described in shimapi.TaskService interface.
   122  //   2. Tool: process is short lived and runs only to perform the requested
   123  //      operations and then exits. It implements the direct functions in
   124  //      shim.Shim interface.
   125  //
   126  // When the service is running, it saves a json file with state information so
   127  // that commands sent to the tool can load the state and perform the operation.
   128  type service struct {
   129  	mu sync.Mutex
   130  
   131  	// id is the container ID.
   132  	id string
   133  
   134  	// bundle is a path provided by the caller on container creation. Store
   135  	// because it's needed in commands that don't receive bundle in the request.
   136  	bundle string
   137  
   138  	// task is the main process that is running the container.
   139  	task *proc.Init
   140  
   141  	// processes maps ExecId to processes running through exec.
   142  	processes map[string]process.Process
   143  
   144  	events chan interface{}
   145  
   146  	// platform handles operations related to the console.
   147  	platform stdio.Platform
   148  
   149  	// genericOptions are options that come from the shim interface and are common
   150  	// to all shims.
   151  	genericOptions shim.Opts
   152  
   153  	// opts are configuration options specific for this shim.
   154  	opts options
   155  
   156  	// ex gets notified whenever the container init process or an exec'd process
   157  	// exits from inside the sandbox.
   158  	ec chan proc.Exit
   159  
   160  	// oomPoller monitors the sandbox's cgroup for OOM notifications.
   161  	oomPoller *epoller
   162  
   163  	// cancel is a function that needs to be called before the shim stops. The
   164  	// function is provided by the caller to New().
   165  	cancel func()
   166  
   167  	// shimAddress is the location of the UDS used to communicate to containerd.
   168  	shimAddress string
   169  }
   170  
   171  func (s *service) newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) {
   172  	ns, err := namespaces.NamespaceRequired(ctx)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	self, err := os.Executable()
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	cwd, err := os.Getwd()
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  	args := []string{
   185  		"-namespace", ns,
   186  		"-address", containerdAddress,
   187  		"-publish-binary", containerdBinary,
   188  	}
   189  	if s.genericOptions.Debug {
   190  		args = append(args, "-debug")
   191  	}
   192  	cmd := exec.Command(self, args...)
   193  	cmd.Dir = cwd
   194  	cmd.Env = append(os.Environ(), "GOMAXPROCS=2")
   195  	cmd.SysProcAttr = &unix.SysProcAttr{
   196  		Setpgid: true,
   197  	}
   198  	return cmd, nil
   199  }
   200  
   201  func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (string, error) {
   202  	log.L.Debugf("StartShim, id: %s, binary: %q, address: %q", id, containerdBinary, containerdAddress)
   203  
   204  	cmd, err := s.newCommand(ctx, containerdBinary, containerdAddress)
   205  	if err != nil {
   206  		return "", err
   207  	}
   208  	address, err := shim.SocketAddress(ctx, containerdAddress, id)
   209  	if err != nil {
   210  		return "", err
   211  	}
   212  	socket, err := shim.NewSocket(address)
   213  	if err != nil {
   214  		// The only time where this would happen is if there is a bug and the socket
   215  		// was not cleaned up in the cleanup method of the shim or we are using the
   216  		// grouping functionality where the new process should be run with the same
   217  		// shim as an existing container.
   218  		if !shim.SocketEaddrinuse(err) {
   219  			return "", fmt.Errorf("create new shim socket: %w", err)
   220  		}
   221  		if shim.CanConnect(address) {
   222  			if err := shim.WriteAddress(shimAddressPath, address); err != nil {
   223  				return "", fmt.Errorf("write existing socket for shim: %w", err)
   224  			}
   225  			return address, nil
   226  		}
   227  		if err := shim.RemoveSocket(address); err != nil {
   228  			return "", fmt.Errorf("remove pre-existing socket: %w", err)
   229  		}
   230  		if socket, err = shim.NewSocket(address); err != nil {
   231  			return "", fmt.Errorf("try create new shim socket 2x: %w", err)
   232  		}
   233  	}
   234  	cu := cleanup.Make(func() {
   235  		socket.Close()
   236  		_ = shim.RemoveSocket(address)
   237  	})
   238  	defer cu.Clean()
   239  
   240  	f, err := socket.File()
   241  	if err != nil {
   242  		return "", err
   243  	}
   244  
   245  	cmd.ExtraFiles = append(cmd.ExtraFiles, f)
   246  
   247  	log.L.Debugf("Executing: %q %s", cmd.Path, cmd.Args)
   248  	if err := cmd.Start(); err != nil {
   249  		f.Close()
   250  		return "", err
   251  	}
   252  	cu.Add(func() { cmd.Process.Kill() })
   253  
   254  	// make sure to wait after start
   255  	go cmd.Wait()
   256  	if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil {
   257  		return "", err
   258  	}
   259  	if err := shim.WriteAddress(shimAddressPath, address); err != nil {
   260  		return "", err
   261  	}
   262  	if err := shim.SetScore(cmd.Process.Pid); err != nil {
   263  		return "", fmt.Errorf("failed to set OOM Score on shim: %w", err)
   264  	}
   265  	cu.Release()
   266  	return address, nil
   267  }
   268  
   269  // Cleanup is called from another process (need to reload state) to stop the
   270  // container and undo all operations done in Create().
   271  func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) {
   272  	log.L.Debugf("Cleanup")
   273  
   274  	path, err := os.Getwd()
   275  	if err != nil {
   276  		return nil, err
   277  	}
   278  	ns, err := namespaces.NamespaceRequired(ctx)
   279  	if err != nil {
   280  		return nil, err
   281  	}
   282  	var st state
   283  	if err := st.load(path); err != nil {
   284  		return nil, err
   285  	}
   286  	r := proc.NewRunsc(s.opts.Root, path, ns, st.Options.BinaryName, nil)
   287  
   288  	if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{
   289  		Force: true,
   290  	}); err != nil {
   291  		log.L.Infof("failed to remove runc container: %v", err)
   292  	}
   293  	if err := mount.UnmountAll(st.Rootfs, 0); err != nil {
   294  		log.L.Infof("failed to cleanup rootfs mount: %v", err)
   295  	}
   296  	return &taskAPI.DeleteResponse{
   297  		ExitedAt:   time.Now(),
   298  		ExitStatus: 128 + uint32(unix.SIGKILL),
   299  	}, nil
   300  }
   301  
   302  // Create creates a new initial process and container with the underlying OCI
   303  // runtime.
   304  func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) {
   305  	s.mu.Lock()
   306  	defer s.mu.Unlock()
   307  
   308  	// Save the main task id and bundle to the shim for additional requests.
   309  	s.id = r.ID
   310  	s.bundle = r.Bundle
   311  
   312  	ns, err := namespaces.NamespaceRequired(ctx)
   313  	if err != nil {
   314  		return nil, fmt.Errorf("create namespace: %w", err)
   315  	}
   316  
   317  	// Read from root for now.
   318  	if r.Options != nil {
   319  		v, err := typeurl.UnmarshalAny(r.Options)
   320  		if err != nil {
   321  			return nil, err
   322  		}
   323  		var path string
   324  		switch o := v.(type) {
   325  		case *runctypes.CreateOptions: // containerd 1.2.x
   326  			s.opts.IoUID = o.IoUid
   327  			s.opts.IoGID = o.IoGid
   328  			s.opts.ShimCgroup = o.ShimCgroup
   329  		case *runctypes.RuncOptions: // containerd 1.2.x
   330  			root := proc.RunscRoot
   331  			if o.RuntimeRoot != "" {
   332  				root = o.RuntimeRoot
   333  			}
   334  
   335  			s.opts.BinaryName = o.Runtime
   336  
   337  			path = filepath.Join(root, configFile)
   338  			if _, err := os.Stat(path); err != nil {
   339  				if !os.IsNotExist(err) {
   340  					return nil, fmt.Errorf("stat config file %q: %w", path, err)
   341  				}
   342  				// A config file in runtime root is not required.
   343  				path = ""
   344  			}
   345  		case *runtimeoptions.Options: // containerd 1.3.x+
   346  			if o.ConfigPath == "" {
   347  				break
   348  			}
   349  			if o.TypeUrl != optionsType {
   350  				return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl)
   351  			}
   352  			path = o.ConfigPath
   353  		default:
   354  			return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl)
   355  		}
   356  		if path != "" {
   357  			if _, err = toml.DecodeFile(path, &s.opts); err != nil {
   358  				return nil, fmt.Errorf("decode config file %q: %w", path, err)
   359  			}
   360  		}
   361  	}
   362  
   363  	if len(s.opts.LogLevel) != 0 {
   364  		lvl, err := logrus.ParseLevel(s.opts.LogLevel)
   365  		if err != nil {
   366  			return nil, err
   367  		}
   368  		logrus.SetLevel(lvl)
   369  	}
   370  	if len(s.opts.LogPath) != 0 {
   371  		logPath := runsc.FormatShimLogPath(s.opts.LogPath, s.id)
   372  		if err := os.MkdirAll(filepath.Dir(logPath), 0777); err != nil {
   373  			return nil, fmt.Errorf("failed to create log dir: %w", err)
   374  		}
   375  		logFile, err := os.Create(logPath)
   376  		if err != nil {
   377  			return nil, fmt.Errorf("failed to create log file: %w", err)
   378  		}
   379  		log.L.Debugf("Starting mirror log at %q", logPath)
   380  		std := logrus.StandardLogger()
   381  		std.SetOutput(io.MultiWriter(std.Out, logFile))
   382  
   383  		log.L.Debugf("Create shim")
   384  		log.L.Debugf("***************************")
   385  		log.L.Debugf("Args: %s", os.Args)
   386  		log.L.Debugf("PID: %d", os.Getpid())
   387  		log.L.Debugf("ID: %s", s.id)
   388  		log.L.Debugf("Options: %+v", s.opts)
   389  		log.L.Debugf("Bundle: %s", r.Bundle)
   390  		log.L.Debugf("Terminal: %t", r.Terminal)
   391  		log.L.Debugf("stdin: %s", r.Stdin)
   392  		log.L.Debugf("stdout: %s", r.Stdout)
   393  		log.L.Debugf("stderr: %s", r.Stderr)
   394  		log.L.Debugf("***************************")
   395  		if log.L.Logger.IsLevelEnabled(logrus.DebugLevel) {
   396  			setDebugSigHandler()
   397  		}
   398  	}
   399  
   400  	// Save state before any action is taken to ensure Cleanup() will have all
   401  	// the information it needs to undo the operations.
   402  	st := state{
   403  		Rootfs:  filepath.Join(r.Bundle, "rootfs"),
   404  		Options: s.opts,
   405  	}
   406  	if err := st.save(r.Bundle); err != nil {
   407  		return nil, err
   408  	}
   409  
   410  	if err := os.Mkdir(st.Rootfs, 0711); err != nil && !os.IsExist(err) {
   411  		return nil, err
   412  	}
   413  
   414  	// Convert from types.Mount to proc.Mount.
   415  	var mounts []proc.Mount
   416  	for _, m := range r.Rootfs {
   417  		mounts = append(mounts, proc.Mount{
   418  			Type:    m.Type,
   419  			Source:  m.Source,
   420  			Target:  m.Target,
   421  			Options: m.Options,
   422  		})
   423  	}
   424  
   425  	// Cleans up all mounts in case of failure.
   426  	cu := cleanup.Make(func() {
   427  		if err := mount.UnmountAll(st.Rootfs, 0); err != nil {
   428  			log.L.Infof("failed to cleanup rootfs mount: %v", err)
   429  		}
   430  	})
   431  	defer cu.Clean()
   432  	for _, rm := range mounts {
   433  		m := &mount.Mount{
   434  			Type:    rm.Type,
   435  			Source:  rm.Source,
   436  			Options: rm.Options,
   437  		}
   438  		if err := m.Mount(st.Rootfs); err != nil {
   439  			return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err)
   440  		}
   441  	}
   442  
   443  	config := &proc.CreateConfig{
   444  		ID:       r.ID,
   445  		Bundle:   r.Bundle,
   446  		Runtime:  s.opts.BinaryName,
   447  		Rootfs:   mounts,
   448  		Terminal: r.Terminal,
   449  		Stdin:    r.Stdin,
   450  		Stdout:   r.Stdout,
   451  		Stderr:   r.Stderr,
   452  	}
   453  	process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs)
   454  	if err != nil {
   455  		return nil, utils.ErrToGRPC(err)
   456  	}
   457  	if err := process.Create(ctx, config); err != nil {
   458  		return nil, utils.ErrToGRPC(err)
   459  	}
   460  
   461  	// Set up OOM notification on the sandbox's cgroup. This is done on
   462  	// sandbox create since the sandbox process will be created here.
   463  	pid := process.Pid()
   464  	if pid > 0 {
   465  		cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid))
   466  		if err != nil {
   467  			return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err)
   468  		}
   469  		if err := s.oomPoller.add(s.id, cg); err != nil {
   470  			return nil, fmt.Errorf("add cg to OOM monitor: %w", err)
   471  		}
   472  	}
   473  
   474  	// Success
   475  	cu.Release()
   476  	s.task = process
   477  	return &taskAPI.CreateTaskResponse{
   478  		Pid: uint32(process.Pid()),
   479  	}, nil
   480  }
   481  
   482  // Start starts a process.
   483  func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) {
   484  	log.L.Debugf("Start, id: %s, execID: %s", r.ID, r.ExecID)
   485  
   486  	p, err := s.getProcess(r.ExecID)
   487  	if err != nil {
   488  		return nil, err
   489  	}
   490  	if err := p.Start(ctx); err != nil {
   491  		return nil, err
   492  	}
   493  	// TODO: Set the cgroup and oom notifications on restore.
   494  	// https://github.com/google/gvisor-containerd-shim/issues/58
   495  	return &taskAPI.StartResponse{
   496  		Pid: uint32(p.Pid()),
   497  	}, nil
   498  }
   499  
   500  // Delete deletes the initial process and container.
   501  func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) {
   502  	log.L.Debugf("Delete, id: %s, execID: %s", r.ID, r.ExecID)
   503  
   504  	p, err := s.getProcess(r.ExecID)
   505  	if err != nil {
   506  		return nil, err
   507  	}
   508  	if err := p.Delete(ctx); err != nil {
   509  		return nil, err
   510  	}
   511  	if len(r.ExecID) != 0 {
   512  		s.mu.Lock()
   513  		delete(s.processes, r.ExecID)
   514  		s.mu.Unlock()
   515  	} else if s.platform != nil {
   516  		s.platform.Close()
   517  	}
   518  	return &taskAPI.DeleteResponse{
   519  		ExitStatus: uint32(p.ExitStatus()),
   520  		ExitedAt:   p.ExitedAt(),
   521  		Pid:        uint32(p.Pid()),
   522  	}, nil
   523  }
   524  
   525  // Exec spawns an additional process inside the container.
   526  func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) {
   527  	log.L.Debugf("Exec, id: %s, execID: %s", r.ID, r.ExecID)
   528  
   529  	s.mu.Lock()
   530  	p := s.processes[r.ExecID]
   531  	s.mu.Unlock()
   532  	if p != nil {
   533  		return nil, utils.ErrToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID)
   534  	}
   535  	if s.task == nil {
   536  		return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
   537  	}
   538  	process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{
   539  		ID:       r.ExecID,
   540  		Terminal: r.Terminal,
   541  		Stdin:    r.Stdin,
   542  		Stdout:   r.Stdout,
   543  		Stderr:   r.Stderr,
   544  		Spec:     r.Spec,
   545  	})
   546  	if err != nil {
   547  		return nil, utils.ErrToGRPC(err)
   548  	}
   549  	s.mu.Lock()
   550  	s.processes[r.ExecID] = process
   551  	s.mu.Unlock()
   552  	return empty, nil
   553  }
   554  
   555  // ResizePty resizes the terminal of a process.
   556  func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) {
   557  	log.L.Debugf("ResizePty, id: %s, execID: %s, dimension: %dx%d", r.ID, r.ExecID, r.Height, r.Width)
   558  
   559  	p, err := s.getProcess(r.ExecID)
   560  	if err != nil {
   561  		return nil, err
   562  	}
   563  	ws := console.WinSize{
   564  		Width:  uint16(r.Width),
   565  		Height: uint16(r.Height),
   566  	}
   567  	if err := p.Resize(ws); err != nil {
   568  		return nil, utils.ErrToGRPC(err)
   569  	}
   570  	return empty, nil
   571  }
   572  
   573  // State returns runtime state information for a process.
   574  func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) {
   575  	log.L.Debugf("State, id: %s, execID: %s", r.ID, r.ExecID)
   576  
   577  	p, err := s.getProcess(r.ExecID)
   578  	if err != nil {
   579  		log.L.Debugf("State failed to find process: %v", err)
   580  		return nil, err
   581  	}
   582  	st, err := p.Status(ctx)
   583  	if err != nil {
   584  		log.L.Debugf("State failed: %v", err)
   585  		return nil, err
   586  	}
   587  	status := task.StatusUnknown
   588  	switch st {
   589  	case "created":
   590  		status = task.StatusCreated
   591  	case "running":
   592  		status = task.StatusRunning
   593  	case "stopped":
   594  		status = task.StatusStopped
   595  	}
   596  	sio := p.Stdio()
   597  	res := &taskAPI.StateResponse{
   598  		ID:         p.ID(),
   599  		Bundle:     s.bundle,
   600  		Pid:        uint32(p.Pid()),
   601  		Status:     status,
   602  		Stdin:      sio.Stdin,
   603  		Stdout:     sio.Stdout,
   604  		Stderr:     sio.Stderr,
   605  		Terminal:   sio.Terminal,
   606  		ExitStatus: uint32(p.ExitStatus()),
   607  		ExitedAt:   p.ExitedAt(),
   608  	}
   609  	log.L.Debugf("State succeeded, response: %+v", res)
   610  	return res, nil
   611  }
   612  
   613  // Pause the container.
   614  func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) {
   615  	log.L.Debugf("Pause, id: %s", r.ID)
   616  	if s.task == nil {
   617  		log.L.Debugf("Pause error, id: %s: container not created", r.ID)
   618  		return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
   619  	}
   620  	err := s.task.Runtime().Pause(ctx, r.ID)
   621  	if err != nil {
   622  		return nil, err
   623  	}
   624  	return empty, nil
   625  }
   626  
   627  // Resume the container.
   628  func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) {
   629  	log.L.Debugf("Resume, id: %s", r.ID)
   630  	if s.task == nil {
   631  		log.L.Debugf("Resume error, id: %s: container not created", r.ID)
   632  		return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
   633  	}
   634  	err := s.task.Runtime().Resume(ctx, r.ID)
   635  	if err != nil {
   636  		return nil, err
   637  	}
   638  	return empty, nil
   639  }
   640  
   641  // Kill a process with the provided signal.
   642  func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) {
   643  	log.L.Debugf("Kill, id: %s, execID: %s, signal: %d, all: %t", r.ID, r.ExecID, r.Signal, r.All)
   644  
   645  	p, err := s.getProcess(r.ExecID)
   646  	if err != nil {
   647  		return nil, err
   648  	}
   649  	if err := p.Kill(ctx, r.Signal, r.All); err != nil {
   650  		log.L.Debugf("Kill failed: %v", err)
   651  		return nil, utils.ErrToGRPC(err)
   652  	}
   653  	log.L.Debugf("Kill succeeded")
   654  	return empty, nil
   655  }
   656  
   657  // Pids returns all pids inside the container.
   658  func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) {
   659  	log.L.Debugf("Pids, id: %s", r.ID)
   660  
   661  	pids, err := s.getContainerPids(ctx, r.ID)
   662  	if err != nil {
   663  		return nil, utils.ErrToGRPC(err)
   664  	}
   665  	var processes []*task.ProcessInfo
   666  	for _, pid := range pids {
   667  		pInfo := task.ProcessInfo{
   668  			Pid: pid,
   669  		}
   670  		for _, p := range s.processes {
   671  			if p.Pid() == int(pid) {
   672  				d := &runctypes.ProcessDetails{
   673  					ExecID: p.ID(),
   674  				}
   675  				a, err := typeurl.MarshalAny(d)
   676  				if err != nil {
   677  					return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err)
   678  				}
   679  				pInfo.Info = a
   680  				break
   681  			}
   682  		}
   683  		processes = append(processes, &pInfo)
   684  	}
   685  	return &taskAPI.PidsResponse{
   686  		Processes: processes,
   687  	}, nil
   688  }
   689  
   690  // CloseIO closes the I/O context of a process.
   691  func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) {
   692  	log.L.Debugf("CloseIO, id: %s, execID: %s, stdin: %t", r.ID, r.ExecID, r.Stdin)
   693  
   694  	p, err := s.getProcess(r.ExecID)
   695  	if err != nil {
   696  		return nil, err
   697  	}
   698  	if stdin := p.Stdin(); stdin != nil {
   699  		if err := stdin.Close(); err != nil {
   700  			return nil, fmt.Errorf("close stdin: %w", err)
   701  		}
   702  	}
   703  	return empty, nil
   704  }
   705  
   706  // Checkpoint checkpoints the container.
   707  func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) {
   708  	log.L.Debugf("Checkpoint, id: %s", r.ID)
   709  	return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented)
   710  }
   711  
   712  // Connect returns shim information such as the shim's pid.
   713  func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) {
   714  	log.L.Debugf("Connect, id: %s", r.ID)
   715  
   716  	var pid int
   717  	if s.task != nil {
   718  		pid = s.task.Pid()
   719  	}
   720  	return &taskAPI.ConnectResponse{
   721  		ShimPid: uint32(os.Getpid()),
   722  		TaskPid: uint32(pid),
   723  	}, nil
   724  }
   725  
   726  func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) {
   727  	log.L.Debugf("Shutdown, id: %s", r.ID)
   728  	s.cancel()
   729  	if s.shimAddress != "" {
   730  		_ = shim.RemoveSocket(s.shimAddress)
   731  	}
   732  	os.Exit(0)
   733  	panic("Should not get here")
   734  }
   735  
   736  func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
   737  	log.L.Debugf("Stats, id: %s", r.ID)
   738  	if s.task == nil {
   739  		log.L.Debugf("Stats error, id: %s: container not created", r.ID)
   740  		return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
   741  	}
   742  	stats, err := s.task.Stats(ctx, s.id)
   743  	if err != nil {
   744  		log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
   745  		return nil, err
   746  	}
   747  
   748  	// gvisor currently (as of 2020-03-03) only returns the total memory
   749  	// usage and current PID value[0]. However, we copy the common fields here
   750  	// so that future updates will propagate correct information.  We're
   751  	// using the cgroups.Metrics structure so we're returning the same type
   752  	// as runc.
   753  	//
   754  	// [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81
   755  	metrics := &cgroupsstats.Metrics{
   756  		CPU: &cgroupsstats.CPUStat{
   757  			Usage: &cgroupsstats.CPUUsage{
   758  				Total:  stats.Cpu.Usage.Total,
   759  				Kernel: stats.Cpu.Usage.Kernel,
   760  				User:   stats.Cpu.Usage.User,
   761  				PerCPU: stats.Cpu.Usage.Percpu,
   762  			},
   763  			Throttling: &cgroupsstats.Throttle{
   764  				Periods:          stats.Cpu.Throttling.Periods,
   765  				ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods,
   766  				ThrottledTime:    stats.Cpu.Throttling.ThrottledTime,
   767  			},
   768  		},
   769  		Memory: &cgroupsstats.MemoryStat{
   770  			Cache: stats.Memory.Cache,
   771  			Usage: &cgroupsstats.MemoryEntry{
   772  				Limit:   stats.Memory.Usage.Limit,
   773  				Usage:   stats.Memory.Usage.Usage,
   774  				Max:     stats.Memory.Usage.Max,
   775  				Failcnt: stats.Memory.Usage.Failcnt,
   776  			},
   777  			Swap: &cgroupsstats.MemoryEntry{
   778  				Limit:   stats.Memory.Swap.Limit,
   779  				Usage:   stats.Memory.Swap.Usage,
   780  				Max:     stats.Memory.Swap.Max,
   781  				Failcnt: stats.Memory.Swap.Failcnt,
   782  			},
   783  			Kernel: &cgroupsstats.MemoryEntry{
   784  				Limit:   stats.Memory.Kernel.Limit,
   785  				Usage:   stats.Memory.Kernel.Usage,
   786  				Max:     stats.Memory.Kernel.Max,
   787  				Failcnt: stats.Memory.Kernel.Failcnt,
   788  			},
   789  			KernelTCP: &cgroupsstats.MemoryEntry{
   790  				Limit:   stats.Memory.KernelTCP.Limit,
   791  				Usage:   stats.Memory.KernelTCP.Usage,
   792  				Max:     stats.Memory.KernelTCP.Max,
   793  				Failcnt: stats.Memory.KernelTCP.Failcnt,
   794  			},
   795  		},
   796  		Pids: &cgroupsstats.PidsStat{
   797  			Current: stats.Pids.Current,
   798  			Limit:   stats.Pids.Limit,
   799  		},
   800  	}
   801  	data, err := typeurl.MarshalAny(metrics)
   802  	if err != nil {
   803  		log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
   804  		return nil, err
   805  	}
   806  	log.L.Debugf("Stats success, id: %s: %+v", r.ID, data)
   807  	return &taskAPI.StatsResponse{
   808  		Stats: data,
   809  	}, nil
   810  }
   811  
   812  // Update updates a running container.
   813  func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) {
   814  	return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented)
   815  }
   816  
   817  // Wait waits for a process to exit.
   818  func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) {
   819  	log.L.Debugf("Wait, id: %s, execID: %s", r.ID, r.ExecID)
   820  
   821  	p, err := s.getProcess(r.ExecID)
   822  	if err != nil {
   823  		log.L.Debugf("Wait failed to find process: %v", err)
   824  		return nil, err
   825  	}
   826  	p.Wait()
   827  
   828  	res := &taskAPI.WaitResponse{
   829  		ExitStatus: uint32(p.ExitStatus()),
   830  		ExitedAt:   p.ExitedAt(),
   831  	}
   832  	log.L.Debugf("Wait succeeded, response: %+v", res)
   833  	return res, nil
   834  }
   835  
   836  func (s *service) processExits(ctx context.Context) {
   837  	for e := range s.ec {
   838  		s.checkProcesses(ctx, e)
   839  	}
   840  }
   841  
   842  func (s *service) checkProcesses(ctx context.Context, e proc.Exit) {
   843  	// TODO(random-liu): Add `shouldKillAll` logic if container pid
   844  	// namespace is supported.
   845  	for _, p := range s.allProcesses() {
   846  		if p.ID() == e.ID {
   847  			if ip, ok := p.(*proc.Init); ok {
   848  				// Ensure all children are killed.
   849  				log.L.Debugf("Container init process exited, killing all container processes")
   850  				ip.KillAll(ctx)
   851  			}
   852  			p.SetExited(e.Status)
   853  			s.events <- &events.TaskExit{
   854  				ContainerID: s.id,
   855  				ID:          p.ID(),
   856  				Pid:         uint32(p.Pid()),
   857  				ExitStatus:  uint32(e.Status),
   858  				ExitedAt:    p.ExitedAt(),
   859  			}
   860  			return
   861  		}
   862  	}
   863  }
   864  
   865  func (s *service) allProcesses() (o []process.Process) {
   866  	s.mu.Lock()
   867  	defer s.mu.Unlock()
   868  	for _, p := range s.processes {
   869  		o = append(o, p)
   870  	}
   871  	if s.task != nil {
   872  		o = append(o, s.task)
   873  	}
   874  	return o
   875  }
   876  
   877  func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) {
   878  	s.mu.Lock()
   879  	p := s.task
   880  	s.mu.Unlock()
   881  	if p == nil {
   882  		return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition)
   883  	}
   884  	ps, err := p.Runtime().Ps(ctx, id)
   885  	if err != nil {
   886  		return nil, err
   887  	}
   888  	pids := make([]uint32, 0, len(ps))
   889  	for _, pid := range ps {
   890  		pids = append(pids, uint32(pid))
   891  	}
   892  	return pids, nil
   893  }
   894  
   895  func (s *service) forward(ctx context.Context, publisher shim.Publisher) {
   896  	for e := range s.events {
   897  		err := publisher.Publish(ctx, getTopic(e), e)
   898  		if err != nil {
   899  			// Should not happen.
   900  			panic(fmt.Errorf("post event: %w", err))
   901  		}
   902  	}
   903  }
   904  
   905  func (s *service) getProcess(execID string) (process.Process, error) {
   906  	s.mu.Lock()
   907  	defer s.mu.Unlock()
   908  
   909  	if execID == "" {
   910  		if s.task == nil {
   911  			return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
   912  		}
   913  		return s.task, nil
   914  	}
   915  
   916  	p := s.processes[execID]
   917  	if p == nil {
   918  		return nil, utils.ErrToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID)
   919  	}
   920  	return p, nil
   921  }
   922  
   923  func getTopic(e interface{}) string {
   924  	switch e.(type) {
   925  	case *events.TaskCreate:
   926  		return runtime.TaskCreateEventTopic
   927  	case *events.TaskStart:
   928  		return runtime.TaskStartEventTopic
   929  	case *events.TaskOOM:
   930  		return runtime.TaskOOMEventTopic
   931  	case *events.TaskExit:
   932  		return runtime.TaskExitEventTopic
   933  	case *events.TaskDelete:
   934  		return runtime.TaskDeleteEventTopic
   935  	case *events.TaskExecAdded:
   936  		return runtime.TaskExecAddedEventTopic
   937  	case *events.TaskExecStarted:
   938  		return runtime.TaskExecStartedEventTopic
   939  	default:
   940  		log.L.Infof("no topic for type %#v", e)
   941  	}
   942  	return runtime.TaskUnknownTopic
   943  }
   944  
   945  func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.CreateConfig, options *options, rootfs string) (*proc.Init, error) {
   946  	spec, err := utils.ReadSpec(r.Bundle)
   947  	if err != nil {
   948  		return nil, fmt.Errorf("read oci spec: %w", err)
   949  	}
   950  
   951  	updated, err := utils.UpdateVolumeAnnotations(spec)
   952  	if err != nil {
   953  		return nil, fmt.Errorf("update volume annotations: %w", err)
   954  	}
   955  	updated = updateCgroup(spec) || updated
   956  
   957  	if updated {
   958  		if err := utils.WriteSpec(r.Bundle, spec); err != nil {
   959  			return nil, err
   960  		}
   961  	}
   962  
   963  	runsc.FormatRunscLogPath(r.ID, options.RunscConfig)
   964  	runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig)
   965  	p := proc.New(r.ID, runtime, stdio.Stdio{
   966  		Stdin:    r.Stdin,
   967  		Stdout:   r.Stdout,
   968  		Stderr:   r.Stderr,
   969  		Terminal: r.Terminal,
   970  	})
   971  	p.Bundle = r.Bundle
   972  	p.Platform = platform
   973  	p.Rootfs = rootfs
   974  	p.WorkDir = workDir
   975  	p.IoUID = int(options.IoUID)
   976  	p.IoGID = int(options.IoGID)
   977  	p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox
   978  	p.UserLog = utils.UserLogPath(spec)
   979  	p.Monitor = reaper.Default
   980  	return p, nil
   981  }
   982  
   983  // updateCgroup updates cgroup path for the sandbox to make the sandbox join the
   984  // pod cgroup and not the pause container cgroup. Returns true if the spec was
   985  // modified. Ex.:
   986  //   /kubepods/burstable/pod123/abc => kubepods/burstable/pod123
   987  //
   988  func updateCgroup(spec *specs.Spec) bool {
   989  	if !utils.IsSandbox(spec) {
   990  		return false
   991  	}
   992  	if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 {
   993  		return false
   994  	}
   995  
   996  	// Search backwards for the pod cgroup path to make the sandbox use it,
   997  	// instead of the pause container's cgroup.
   998  	parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator))
   999  	for i := len(parts) - 1; i >= 0; i-- {
  1000  		if strings.HasPrefix(parts[i], "pod") {
  1001  			var path string
  1002  			for j := 0; j <= i; j++ {
  1003  				path = filepath.Join(path, parts[j])
  1004  			}
  1005  			// Add back the initial '/' that may have been lost above.
  1006  			if filepath.IsAbs(spec.Linux.CgroupsPath) {
  1007  				path = string(filepath.Separator) + path
  1008  			}
  1009  			if spec.Linux.CgroupsPath == path {
  1010  				return false
  1011  			}
  1012  			spec.Linux.CgroupsPath = path
  1013  			return true
  1014  		}
  1015  	}
  1016  	return false
  1017  }