github.com/jwhonce/docker@v0.6.7-0.20190327063223-da823cf3a5a3/libcontainerd/supervisor/remote_daemon.go (about) 1 package supervisor // import "github.com/docker/docker/libcontainerd/supervisor" 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/BurntSushi/toml" 17 "github.com/containerd/containerd" 18 "github.com/containerd/containerd/services/server/config" 19 "github.com/docker/docker/pkg/system" 20 "github.com/pkg/errors" 21 "github.com/sirupsen/logrus" 22 ) 23 24 const ( 25 maxConnectionRetryCount = 3 26 healthCheckTimeout = 3 * time.Second 27 shutdownTimeout = 15 * time.Second 28 startupTimeout = 15 * time.Second 29 configFile = "containerd.toml" 30 binaryName = "containerd" 31 pidFile = "containerd.pid" 32 ) 33 34 type pluginConfigs struct { 35 Plugins map[string]interface{} `toml:"plugins"` 36 } 37 38 type remote struct { 39 sync.RWMutex 40 config.Config 41 42 daemonPid int 43 logger *logrus.Entry 44 45 daemonWaitCh chan struct{} 46 daemonStartCh chan error 47 daemonStopCh chan struct{} 48 49 rootDir string 50 stateDir string 51 pluginConfs pluginConfigs 52 } 53 54 // Daemon represents a running containerd daemon 55 type Daemon interface { 56 WaitTimeout(time.Duration) error 57 Address() string 58 } 59 60 // DaemonOpt allows to configure parameters of container daemons 61 type DaemonOpt func(c *remote) error 62 63 // Start starts a containerd daemon and monitors it 64 func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Daemon, error) { 65 r := &remote{ 66 rootDir: rootDir, 67 stateDir: stateDir, 68 Config: config.Config{ 69 Root: filepath.Join(rootDir, "daemon"), 70 State: filepath.Join(stateDir, "daemon"), 71 }, 72 pluginConfs: pluginConfigs{make(map[string]interface{})}, 73 daemonPid: -1, 74 logger: logrus.WithField("module", "libcontainerd"), 75 daemonStartCh: make(chan error, 1), 76 daemonStopCh: make(chan struct{}), 77 } 78 79 for _, opt := range opts { 80 if err := opt(r); err != nil { 81 return nil, err 82 } 83 } 84 r.setDefaults() 85 86 if err := system.MkdirAll(stateDir, 0700, ""); err != nil { 87 return nil, err 88 } 89 90 go r.monitorDaemon(ctx) 91 92 select { 93 case <-time.After(startupTimeout): 94 return nil, errors.New("timeout waiting for containerd to start") 95 case err := <-r.daemonStartCh: 96 if err != nil { 97 return nil, err 98 } 99 } 100 101 return r, nil 102 } 103 func (r *remote) WaitTimeout(d time.Duration) error { 104 select { 105 case <-time.After(d): 106 return errors.New("timeout waiting for containerd to stop") 107 case <-r.daemonStopCh: 108 } 109 110 return nil 111 } 112 113 func (r *remote) Address() string { 114 return r.GRPC.Address 115 } 116 func (r *remote) getContainerdPid() (int, error) { 117 pidFile := filepath.Join(r.stateDir, pidFile) 118 f, err := os.OpenFile(pidFile, os.O_RDWR, 0600) 119 if err != nil { 120 if os.IsNotExist(err) { 121 return -1, nil 122 } 123 return -1, err 124 } 125 defer f.Close() 126 127 b := make([]byte, 8) 128 n, err := f.Read(b) 129 if err != nil && err != io.EOF { 130 return -1, err 131 } 132 133 if n > 0 { 134 pid, err := strconv.ParseUint(string(b[:n]), 10, 64) 135 if err != nil { 136 return -1, err 137 } 138 if system.IsProcessAlive(int(pid)) { 139 return int(pid), nil 140 } 141 } 142 143 return -1, nil 144 } 145 146 func (r *remote) getContainerdConfig() (string, error) { 147 path := filepath.Join(r.stateDir, configFile) 148 f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) 149 if err != nil { 150 return "", errors.Wrapf(err, "failed to open containerd config file at %s", path) 151 } 152 defer f.Close() 153 154 enc := toml.NewEncoder(f) 155 if err = enc.Encode(r.Config); err != nil { 156 return "", errors.Wrapf(err, "failed to encode general config") 157 } 158 if err = enc.Encode(r.pluginConfs); err != nil { 159 return "", errors.Wrapf(err, "failed to encode plugin configs") 160 } 161 162 return path, nil 163 } 164 165 func (r *remote) startContainerd() error { 166 pid, err := r.getContainerdPid() 167 if err != nil { 168 return err 169 } 170 171 if pid != -1 { 172 r.daemonPid = pid 173 logrus.WithField("pid", pid). 174 Infof("libcontainerd: %s is still running", binaryName) 175 return nil 176 } 177 178 configFile, err := r.getContainerdConfig() 179 if err != nil { 180 return err 181 } 182 183 args := []string{"--config", configFile} 184 185 if r.Debug.Level != "" { 186 args = append(args, "--log-level", r.Debug.Level) 187 } 188 189 cmd := exec.Command(binaryName, args...) 190 // redirect containerd logs to docker logs 191 cmd.Stdout = os.Stdout 192 cmd.Stderr = os.Stderr 193 cmd.SysProcAttr = containerdSysProcAttr() 194 // clear the NOTIFY_SOCKET from the env when starting containerd 195 cmd.Env = nil 196 for _, e := range os.Environ() { 197 if !strings.HasPrefix(e, "NOTIFY_SOCKET") { 198 cmd.Env = append(cmd.Env, e) 199 } 200 } 201 if err := cmd.Start(); err != nil { 202 return err 203 } 204 205 r.daemonWaitCh = make(chan struct{}) 206 go func() { 207 // Reap our child when needed 208 if err := cmd.Wait(); err != nil { 209 r.logger.WithError(err).Errorf("containerd did not exit successfully") 210 } 211 close(r.daemonWaitCh) 212 }() 213 214 r.daemonPid = cmd.Process.Pid 215 216 err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660) 217 if err != nil { 218 system.KillProcess(r.daemonPid) 219 return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk") 220 } 221 222 logrus.WithField("pid", r.daemonPid). 223 Infof("libcontainerd: started new %s process", binaryName) 224 225 return nil 226 } 227 228 func (r *remote) monitorDaemon(ctx context.Context) { 229 var ( 230 transientFailureCount = 0 231 client *containerd.Client 232 err error 233 delay <-chan time.Time 234 started bool 235 ) 236 237 defer func() { 238 if r.daemonPid != -1 { 239 r.stopDaemon() 240 } 241 242 // cleanup some files 243 os.Remove(filepath.Join(r.stateDir, pidFile)) 244 245 r.platformCleanup() 246 247 close(r.daemonStopCh) 248 }() 249 250 for { 251 if delay != nil { 252 select { 253 case <-ctx.Done(): 254 r.logger.Info("stopping healthcheck following graceful shutdown") 255 if client != nil { 256 client.Close() 257 } 258 return 259 case <-delay: 260 } 261 } 262 263 if r.daemonPid == -1 { 264 if r.daemonWaitCh != nil { 265 select { 266 case <-ctx.Done(): 267 r.logger.Info("stopping containerd startup following graceful shutdown") 268 return 269 case <-r.daemonWaitCh: 270 } 271 } 272 273 os.RemoveAll(r.GRPC.Address) 274 if err := r.startContainerd(); err != nil { 275 if !started { 276 r.daemonStartCh <- err 277 return 278 } 279 r.logger.WithError(err).Error("failed restarting containerd") 280 delay = time.After(50 * time.Millisecond) 281 continue 282 } 283 284 client, err = containerd.New(r.GRPC.Address, containerd.WithTimeout(60*time.Second)) 285 if err != nil { 286 r.logger.WithError(err).Error("failed connecting to containerd") 287 delay = time.After(100 * time.Millisecond) 288 continue 289 } 290 } 291 292 if client != nil { 293 tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout) 294 _, err := client.IsServing(tctx) 295 cancel() 296 if err == nil { 297 if !started { 298 close(r.daemonStartCh) 299 started = true 300 } 301 302 transientFailureCount = 0 303 delay = time.After(500 * time.Millisecond) 304 continue 305 } 306 307 r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding") 308 309 transientFailureCount++ 310 if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) { 311 delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond) 312 continue 313 } 314 client.Close() 315 client = nil 316 } 317 318 if system.IsProcessAlive(r.daemonPid) { 319 r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd") 320 r.killDaemon() 321 } 322 323 r.daemonPid = -1 324 delay = nil 325 transientFailureCount = 0 326 } 327 }