github.com/devdivbcp/moby@v17.12.0-ce-rc1.0.20200726071732-2d4bfdc789ad+incompatible/libcontainerd/supervisor/remote_daemon.go (about)

     1  package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/BurntSushi/toml"
    17  	"github.com/containerd/containerd"
    18  	"github.com/containerd/containerd/services/server/config"
    19  	"github.com/docker/docker/pkg/system"
    20  	"github.com/pkg/errors"
    21  	"github.com/sirupsen/logrus"
    22  )
    23  
    24  const (
    25  	maxConnectionRetryCount = 3
    26  	healthCheckTimeout      = 3 * time.Second
    27  	shutdownTimeout         = 15 * time.Second
    28  	startupTimeout          = 15 * time.Second
    29  	configFile              = "containerd.toml"
    30  	binaryName              = "containerd"
    31  	pidFile                 = "containerd.pid"
    32  )
    33  
    34  type pluginConfigs struct {
    35  	Plugins map[string]interface{} `toml:"plugins"`
    36  }
    37  
    38  type remote struct {
    39  	sync.RWMutex
    40  	config.Config
    41  
    42  	daemonPid int
    43  	logger    *logrus.Entry
    44  
    45  	daemonWaitCh  chan struct{}
    46  	daemonStartCh chan error
    47  	daemonStopCh  chan struct{}
    48  
    49  	rootDir     string
    50  	stateDir    string
    51  	pluginConfs pluginConfigs
    52  }
    53  
    54  // Daemon represents a running containerd daemon
    55  type Daemon interface {
    56  	WaitTimeout(time.Duration) error
    57  	Address() string
    58  }
    59  
    60  // DaemonOpt allows to configure parameters of container daemons
    61  type DaemonOpt func(c *remote) error
    62  
    63  // Start starts a containerd daemon and monitors it
    64  func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Daemon, error) {
    65  	r := &remote{
    66  		rootDir:  rootDir,
    67  		stateDir: stateDir,
    68  		Config: config.Config{
    69  			Root:  filepath.Join(rootDir, "daemon"),
    70  			State: filepath.Join(stateDir, "daemon"),
    71  		},
    72  		pluginConfs:   pluginConfigs{make(map[string]interface{})},
    73  		daemonPid:     -1,
    74  		logger:        logrus.WithField("module", "libcontainerd"),
    75  		daemonStartCh: make(chan error, 1),
    76  		daemonStopCh:  make(chan struct{}),
    77  	}
    78  
    79  	for _, opt := range opts {
    80  		if err := opt(r); err != nil {
    81  			return nil, err
    82  		}
    83  	}
    84  	r.setDefaults()
    85  
    86  	if err := system.MkdirAll(stateDir, 0700, ""); err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	go r.monitorDaemon(ctx)
    91  
    92  	select {
    93  	case <-time.After(startupTimeout):
    94  		return nil, errors.New("timeout waiting for containerd to start")
    95  	case err := <-r.daemonStartCh:
    96  		if err != nil {
    97  			return nil, err
    98  		}
    99  	}
   100  
   101  	return r, nil
   102  }
   103  func (r *remote) WaitTimeout(d time.Duration) error {
   104  	select {
   105  	case <-time.After(d):
   106  		return errors.New("timeout waiting for containerd to stop")
   107  	case <-r.daemonStopCh:
   108  	}
   109  
   110  	return nil
   111  }
   112  
   113  func (r *remote) Address() string {
   114  	return r.GRPC.Address
   115  }
   116  func (r *remote) getContainerdPid() (int, error) {
   117  	pidFile := filepath.Join(r.stateDir, pidFile)
   118  	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
   119  	if err != nil {
   120  		if os.IsNotExist(err) {
   121  			return -1, nil
   122  		}
   123  		return -1, err
   124  	}
   125  	defer f.Close()
   126  
   127  	b := make([]byte, 8)
   128  	n, err := f.Read(b)
   129  	if err != nil && err != io.EOF {
   130  		return -1, err
   131  	}
   132  
   133  	if n > 0 {
   134  		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
   135  		if err != nil {
   136  			return -1, err
   137  		}
   138  		if system.IsProcessAlive(int(pid)) {
   139  			return int(pid), nil
   140  		}
   141  	}
   142  
   143  	return -1, nil
   144  }
   145  
   146  func (r *remote) getContainerdConfig() (string, error) {
   147  	path := filepath.Join(r.stateDir, configFile)
   148  	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
   149  	if err != nil {
   150  		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
   151  	}
   152  	defer f.Close()
   153  
   154  	enc := toml.NewEncoder(f)
   155  	if err = enc.Encode(r.Config); err != nil {
   156  		return "", errors.Wrapf(err, "failed to encode general config")
   157  	}
   158  	if err = enc.Encode(r.pluginConfs); err != nil {
   159  		return "", errors.Wrapf(err, "failed to encode plugin configs")
   160  	}
   161  
   162  	return path, nil
   163  }
   164  
   165  func (r *remote) startContainerd() error {
   166  	pid, err := r.getContainerdPid()
   167  	if err != nil {
   168  		return err
   169  	}
   170  
   171  	if pid != -1 {
   172  		r.daemonPid = pid
   173  		logrus.WithField("pid", pid).
   174  			Infof("libcontainerd: %s is still running", binaryName)
   175  		return nil
   176  	}
   177  
   178  	configFile, err := r.getContainerdConfig()
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	args := []string{"--config", configFile}
   184  
   185  	if r.Debug.Level != "" {
   186  		args = append(args, "--log-level", r.Debug.Level)
   187  	}
   188  
   189  	cmd := exec.Command(binaryName, args...)
   190  	// redirect containerd logs to docker logs
   191  	cmd.Stdout = os.Stdout
   192  	cmd.Stderr = os.Stderr
   193  	cmd.SysProcAttr = containerdSysProcAttr()
   194  	// clear the NOTIFY_SOCKET from the env when starting containerd
   195  	cmd.Env = nil
   196  	for _, e := range os.Environ() {
   197  		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
   198  			cmd.Env = append(cmd.Env, e)
   199  		}
   200  	}
   201  	if err := cmd.Start(); err != nil {
   202  		return err
   203  	}
   204  
   205  	r.daemonWaitCh = make(chan struct{})
   206  	go func() {
   207  		// Reap our child when needed
   208  		if err := cmd.Wait(); err != nil {
   209  			r.logger.WithError(err).Errorf("containerd did not exit successfully")
   210  		}
   211  		close(r.daemonWaitCh)
   212  	}()
   213  
   214  	r.daemonPid = cmd.Process.Pid
   215  
   216  	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
   217  	if err != nil {
   218  		system.KillProcess(r.daemonPid)
   219  		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
   220  	}
   221  
   222  	logrus.WithField("pid", r.daemonPid).
   223  		Infof("libcontainerd: started new %s process", binaryName)
   224  
   225  	return nil
   226  }
   227  
   228  func (r *remote) monitorDaemon(ctx context.Context) {
   229  	var (
   230  		transientFailureCount = 0
   231  		client                *containerd.Client
   232  		err                   error
   233  		delay                 <-chan time.Time
   234  		started               bool
   235  	)
   236  
   237  	defer func() {
   238  		if r.daemonPid != -1 {
   239  			r.stopDaemon()
   240  		}
   241  
   242  		// cleanup some files
   243  		os.Remove(filepath.Join(r.stateDir, pidFile))
   244  
   245  		r.platformCleanup()
   246  
   247  		close(r.daemonStopCh)
   248  	}()
   249  
   250  	for {
   251  		if delay != nil {
   252  			select {
   253  			case <-ctx.Done():
   254  				r.logger.Info("stopping healthcheck following graceful shutdown")
   255  				if client != nil {
   256  					client.Close()
   257  				}
   258  				return
   259  			case <-delay:
   260  			}
   261  		}
   262  
   263  		if r.daemonPid == -1 {
   264  			if r.daemonWaitCh != nil {
   265  				select {
   266  				case <-ctx.Done():
   267  					r.logger.Info("stopping containerd startup following graceful shutdown")
   268  					return
   269  				case <-r.daemonWaitCh:
   270  				}
   271  			}
   272  
   273  			os.RemoveAll(r.GRPC.Address)
   274  			if err := r.startContainerd(); err != nil {
   275  				if !started {
   276  					r.daemonStartCh <- err
   277  					return
   278  				}
   279  				r.logger.WithError(err).Error("failed restarting containerd")
   280  				delay = time.After(50 * time.Millisecond)
   281  				continue
   282  			}
   283  
   284  			client, err = containerd.New(r.GRPC.Address, containerd.WithTimeout(60*time.Second))
   285  			if err != nil {
   286  				r.logger.WithError(err).Error("failed connecting to containerd")
   287  				delay = time.After(100 * time.Millisecond)
   288  				continue
   289  			}
   290  		}
   291  
   292  		if client != nil {
   293  			tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
   294  			_, err := client.IsServing(tctx)
   295  			cancel()
   296  			if err == nil {
   297  				if !started {
   298  					close(r.daemonStartCh)
   299  					started = true
   300  				}
   301  
   302  				transientFailureCount = 0
   303  				delay = time.After(500 * time.Millisecond)
   304  				continue
   305  			}
   306  
   307  			r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
   308  
   309  			transientFailureCount++
   310  			if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
   311  				delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond)
   312  				continue
   313  			}
   314  			client.Close()
   315  			client = nil
   316  		}
   317  
   318  		if system.IsProcessAlive(r.daemonPid) {
   319  			r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
   320  			r.killDaemon()
   321  		}
   322  
   323  		r.daemonPid = -1
   324  		delay = nil
   325  		transientFailureCount = 0
   326  	}
   327  }