github.com/shishir-a412ed/docker@v1.3.2-0.20180103180333-fda904911d87/libcontainerd/remote_daemon.go (about)

     1  // +build !windows
     2  
     3  package libcontainerd
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"syscall"
    17  	"time"
    18  
    19  	"github.com/BurntSushi/toml"
    20  	"github.com/containerd/containerd"
    21  	"github.com/containerd/containerd/server"
    22  	"github.com/docker/docker/pkg/system"
    23  	"github.com/pkg/errors"
    24  	"github.com/sirupsen/logrus"
    25  )
    26  
    27  const (
    28  	maxConnectionRetryCount = 3
    29  	healthCheckTimeout      = 3 * time.Second
    30  	shutdownTimeout         = 15 * time.Second
    31  	configFile              = "containerd.toml"
    32  	binaryName              = "docker-containerd"
    33  	pidFile                 = "docker-containerd.pid"
    34  )
    35  
    36  type pluginConfigs struct {
    37  	Plugins map[string]interface{} `toml:"plugins"`
    38  }
    39  
    40  type remote struct {
    41  	sync.RWMutex
    42  	server.Config
    43  
    44  	daemonPid int
    45  	logger    *logrus.Entry
    46  
    47  	daemonWaitCh    chan struct{}
    48  	clients         []*client
    49  	shutdownContext context.Context
    50  	shutdownCancel  context.CancelFunc
    51  	shutdown        bool
    52  
    53  	// Options
    54  	startDaemon bool
    55  	rootDir     string
    56  	stateDir    string
    57  	snapshotter string
    58  	pluginConfs pluginConfigs
    59  }
    60  
    61  // New creates a fresh instance of libcontainerd remote.
    62  func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) {
    63  	defer func() {
    64  		if err != nil {
    65  			err = errors.Wrap(err, "Failed to connect to containerd")
    66  		}
    67  	}()
    68  
    69  	r := &remote{
    70  		rootDir:  rootDir,
    71  		stateDir: stateDir,
    72  		Config: server.Config{
    73  			Root:  filepath.Join(rootDir, "daemon"),
    74  			State: filepath.Join(stateDir, "daemon"),
    75  		},
    76  		pluginConfs: pluginConfigs{make(map[string]interface{})},
    77  		daemonPid:   -1,
    78  		logger:      logrus.WithField("module", "libcontainerd"),
    79  	}
    80  	r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background())
    81  
    82  	rem = r
    83  	for _, option := range options {
    84  		if err = option.Apply(r); err != nil {
    85  			return
    86  		}
    87  	}
    88  	r.setDefaults()
    89  
    90  	if err = system.MkdirAll(stateDir, 0700, ""); err != nil {
    91  		return
    92  	}
    93  
    94  	if r.startDaemon {
    95  		os.Remove(r.GRPC.Address)
    96  		if err = r.startContainerd(); err != nil {
    97  			return
    98  		}
    99  		defer func() {
   100  			if err != nil {
   101  				r.Cleanup()
   102  			}
   103  		}()
   104  	}
   105  
   106  	// This connection is just used to monitor the connection
   107  	client, err := containerd.New(r.GRPC.Address)
   108  	if err != nil {
   109  		return
   110  	}
   111  	if _, err := client.Version(context.Background()); err != nil {
   112  		system.KillProcess(r.daemonPid)
   113  		return nil, errors.Wrapf(err, "unable to get containerd version")
   114  	}
   115  
   116  	go r.monitorConnection(client)
   117  
   118  	return r, nil
   119  }
   120  
   121  func (r *remote) NewClient(ns string, b Backend) (Client, error) {
   122  	c := &client{
   123  		stateDir:   r.stateDir,
   124  		logger:     r.logger.WithField("namespace", ns),
   125  		namespace:  ns,
   126  		backend:    b,
   127  		containers: make(map[string]*container),
   128  	}
   129  
   130  	rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns))
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	c.remote = rclient
   135  
   136  	go c.processEventStream(r.shutdownContext)
   137  
   138  	r.Lock()
   139  	r.clients = append(r.clients, c)
   140  	r.Unlock()
   141  	return c, nil
   142  }
   143  
   144  func (r *remote) Cleanup() {
   145  	if r.daemonPid != -1 {
   146  		r.shutdownCancel()
   147  		r.stopDaemon()
   148  	}
   149  
   150  	// cleanup some files
   151  	os.Remove(filepath.Join(r.stateDir, pidFile))
   152  
   153  	r.platformCleanup()
   154  }
   155  
   156  func (r *remote) getContainerdPid() (int, error) {
   157  	pidFile := filepath.Join(r.stateDir, pidFile)
   158  	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
   159  	if err != nil {
   160  		if os.IsNotExist(err) {
   161  			return -1, nil
   162  		}
   163  		return -1, err
   164  	}
   165  	defer f.Close()
   166  
   167  	b := make([]byte, 8)
   168  	n, err := f.Read(b)
   169  	if err != nil && err != io.EOF {
   170  		return -1, err
   171  	}
   172  
   173  	if n > 0 {
   174  		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
   175  		if err != nil {
   176  			return -1, err
   177  		}
   178  		if system.IsProcessAlive(int(pid)) {
   179  			return int(pid), nil
   180  		}
   181  	}
   182  
   183  	return -1, nil
   184  }
   185  
   186  func (r *remote) getContainerdConfig() (string, error) {
   187  	path := filepath.Join(r.stateDir, configFile)
   188  	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
   189  	if err != nil {
   190  		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
   191  	}
   192  	defer f.Close()
   193  
   194  	enc := toml.NewEncoder(f)
   195  	if err = enc.Encode(r.Config); err != nil {
   196  		return "", errors.Wrapf(err, "failed to encode general config")
   197  	}
   198  	if err = enc.Encode(r.pluginConfs); err != nil {
   199  		return "", errors.Wrapf(err, "failed to encode plugin configs")
   200  	}
   201  
   202  	return path, nil
   203  }
   204  
   205  func (r *remote) startContainerd() error {
   206  	pid, err := r.getContainerdPid()
   207  	if err != nil {
   208  		return err
   209  	}
   210  
   211  	if pid != -1 {
   212  		r.daemonPid = pid
   213  		logrus.WithField("pid", pid).
   214  			Infof("libcontainerd: %s is still running", binaryName)
   215  		return nil
   216  	}
   217  
   218  	configFile, err := r.getContainerdConfig()
   219  	if err != nil {
   220  		return err
   221  	}
   222  
   223  	args := []string{"--config", configFile}
   224  	cmd := exec.Command(binaryName, args...)
   225  	// redirect containerd logs to docker logs
   226  	cmd.Stdout = os.Stdout
   227  	cmd.Stderr = os.Stderr
   228  	cmd.SysProcAttr = containerdSysProcAttr()
   229  	// clear the NOTIFY_SOCKET from the env when starting containerd
   230  	cmd.Env = nil
   231  	for _, e := range os.Environ() {
   232  		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
   233  			cmd.Env = append(cmd.Env, e)
   234  		}
   235  	}
   236  	if err := cmd.Start(); err != nil {
   237  		return err
   238  	}
   239  
   240  	r.daemonWaitCh = make(chan struct{})
   241  	go func() {
   242  		// Reap our child when needed
   243  		if err := cmd.Wait(); err != nil {
   244  			r.logger.WithError(err).Errorf("containerd did not exit successfully")
   245  		}
   246  		close(r.daemonWaitCh)
   247  	}()
   248  
   249  	r.daemonPid = cmd.Process.Pid
   250  
   251  	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
   252  	if err != nil {
   253  		system.KillProcess(r.daemonPid)
   254  		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
   255  	}
   256  
   257  	logrus.WithField("pid", r.daemonPid).
   258  		Infof("libcontainerd: started new %s process", binaryName)
   259  
   260  	return nil
   261  }
   262  
   263  func (r *remote) monitorConnection(client *containerd.Client) {
   264  	var transientFailureCount = 0
   265  
   266  	ticker := time.NewTicker(500 * time.Millisecond)
   267  	defer ticker.Stop()
   268  
   269  	for {
   270  		<-ticker.C
   271  		ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout)
   272  		_, err := client.IsServing(ctx)
   273  		cancel()
   274  		if err == nil {
   275  			transientFailureCount = 0
   276  			continue
   277  		}
   278  
   279  		select {
   280  		case <-r.shutdownContext.Done():
   281  			r.logger.Info("stopping healthcheck following graceful shutdown")
   282  			client.Close()
   283  			return
   284  		default:
   285  		}
   286  
   287  		r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
   288  
   289  		if r.daemonPid != -1 {
   290  			transientFailureCount++
   291  			if transientFailureCount >= maxConnectionRetryCount || !system.IsProcessAlive(r.daemonPid) {
   292  				transientFailureCount = 0
   293  				if system.IsProcessAlive(r.daemonPid) {
   294  					r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
   295  					// Try to get a stack trace
   296  					syscall.Kill(r.daemonPid, syscall.SIGUSR1)
   297  					<-time.After(100 * time.Millisecond)
   298  					system.KillProcess(r.daemonPid)
   299  				}
   300  				<-r.daemonWaitCh
   301  				var err error
   302  				client.Close()
   303  				os.Remove(r.GRPC.Address)
   304  				if err = r.startContainerd(); err != nil {
   305  					r.logger.WithError(err).Error("failed restarting containerd")
   306  				} else {
   307  					newClient, err := containerd.New(r.GRPC.Address)
   308  					if err != nil {
   309  						r.logger.WithError(err).Error("failed connect to containerd")
   310  					} else {
   311  						client = newClient
   312  					}
   313  				}
   314  			}
   315  		}
   316  	}
   317  }