github.com/moby/docker@v26.1.3+incompatible/daemon/daemon.go (about) 1 // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16: 2 //go:build go1.19 3 4 // Package daemon exposes the functions that occur on the host server 5 // that the Docker daemon is running. 6 // 7 // In implementing the various functions of the daemon, there is often 8 // a method-specific struct for configuring the runtime behavior. 9 package daemon // import "github.com/docker/docker/daemon" 10 11 import ( 12 "context" 13 "fmt" 14 "net" 15 "os" 16 "path" 17 "path/filepath" 18 "runtime" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 "github.com/containerd/containerd" 24 "github.com/containerd/containerd/defaults" 25 "github.com/containerd/containerd/pkg/dialer" 26 "github.com/containerd/containerd/pkg/userns" 27 "github.com/containerd/containerd/remotes/docker" 28 "github.com/containerd/log" 29 "github.com/distribution/reference" 30 dist "github.com/docker/distribution" 31 "github.com/docker/docker/api/types" 32 "github.com/docker/docker/api/types/backend" 33 containertypes "github.com/docker/docker/api/types/container" 34 imagetypes "github.com/docker/docker/api/types/image" 35 networktypes "github.com/docker/docker/api/types/network" 36 registrytypes "github.com/docker/docker/api/types/registry" 37 "github.com/docker/docker/api/types/swarm" 38 "github.com/docker/docker/api/types/volume" 39 "github.com/docker/docker/builder" 40 "github.com/docker/docker/container" 41 executorpkg "github.com/docker/docker/daemon/cluster/executor" 42 "github.com/docker/docker/daemon/config" 43 ctrd "github.com/docker/docker/daemon/containerd" 44 "github.com/docker/docker/daemon/events" 45 _ "github.com/docker/docker/daemon/graphdriver/register" // register graph drivers 46 "github.com/docker/docker/daemon/images" 47 dlogger "github.com/docker/docker/daemon/logger" 48 "github.com/docker/docker/daemon/logger/local" 49 "github.com/docker/docker/daemon/network" 50 "github.com/docker/docker/daemon/snapshotter" 51 "github.com/docker/docker/daemon/stats" 52 "github.com/docker/docker/distribution" 53 dmetadata "github.com/docker/docker/distribution/metadata" 54 "github.com/docker/docker/dockerversion" 55 "github.com/docker/docker/errdefs" 56 "github.com/docker/docker/image" 57 "github.com/docker/docker/internal/compatcontext" 58 "github.com/docker/docker/layer" 59 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 60 "github.com/docker/docker/libnetwork" 61 "github.com/docker/docker/libnetwork/cluster" 62 nwconfig "github.com/docker/docker/libnetwork/config" 63 "github.com/docker/docker/pkg/authorization" 64 "github.com/docker/docker/pkg/fileutils" 65 "github.com/docker/docker/pkg/idtools" 66 "github.com/docker/docker/pkg/plugingetter" 67 "github.com/docker/docker/pkg/sysinfo" 68 "github.com/docker/docker/pkg/system" 69 "github.com/docker/docker/plugin" 70 pluginexec "github.com/docker/docker/plugin/executor/containerd" 71 refstore "github.com/docker/docker/reference" 72 "github.com/docker/docker/registry" 73 "github.com/docker/docker/runconfig" 74 volumesservice "github.com/docker/docker/volume/service" 75 "github.com/moby/buildkit/util/resolver" 76 resolverconfig "github.com/moby/buildkit/util/resolver/config" 77 "github.com/moby/locker" 78 "github.com/pkg/errors" 79 "go.etcd.io/bbolt" 80 "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" 81 "golang.org/x/sync/semaphore" 82 "google.golang.org/grpc" 83 "google.golang.org/grpc/backoff" 84 "google.golang.org/grpc/credentials/insecure" 85 "resenje.org/singleflight" 86 ) 87 88 type configStore struct { 89 config.Config 90 91 Runtimes runtimes 92 } 93 94 // Daemon holds information about the Docker daemon. 95 type Daemon struct { 96 id string 97 repository string 98 containers container.Store 99 containersReplica *container.ViewDB 100 execCommands *container.ExecStore 101 imageService ImageService 102 configStore atomic.Pointer[configStore] 103 configReload sync.Mutex 104 statsCollector *stats.Collector 105 defaultLogConfig containertypes.LogConfig 106 registryService *registry.Service 107 EventsService *events.Events 108 netController *libnetwork.Controller 109 volumes *volumesservice.VolumesService 110 root string 111 sysInfoOnce sync.Once 112 sysInfo *sysinfo.SysInfo 113 shutdown bool 114 idMapping idtools.IdentityMapping 115 PluginStore *plugin.Store // TODO: remove 116 pluginManager *plugin.Manager 117 linkIndex *linkIndex 118 containerdClient *containerd.Client 119 containerd libcontainerdtypes.Client 120 defaultIsolation containertypes.Isolation // Default isolation mode on Windows 121 clusterProvider cluster.Provider 122 cluster Cluster 123 genericResources []swarm.GenericResource 124 metricsPluginListener net.Listener 125 ReferenceStore refstore.Store 126 127 machineMemory uint64 128 129 seccompProfile []byte 130 seccompProfilePath string 131 132 usageContainers singleflight.Group[struct{}, []*types.Container] 133 usageImages singleflight.Group[struct{}, []*imagetypes.Summary] 134 usageVolumes singleflight.Group[struct{}, []*volume.Volume] 135 usageLayer singleflight.Group[struct{}, int64] 136 137 pruneRunning int32 138 hosts map[string]bool // hosts stores the addresses the daemon is listening on 139 startupDone chan struct{} 140 141 attachmentStore network.AttachmentStore 142 attachableNetworkLock *locker.Locker 143 144 // This is used for Windows which doesn't currently support running on containerd 145 // It stores metadata for the content store (used for manifest caching) 146 // This needs to be closed on daemon exit 147 mdDB *bbolt.DB 148 149 usesSnapshotter bool 150 } 151 152 // ID returns the daemon id 153 func (daemon *Daemon) ID() string { 154 return daemon.id 155 } 156 157 // StoreHosts stores the addresses the daemon is listening on 158 func (daemon *Daemon) StoreHosts(hosts []string) { 159 if daemon.hosts == nil { 160 daemon.hosts = make(map[string]bool) 161 } 162 for _, h := range hosts { 163 daemon.hosts[h] = true 164 } 165 } 166 167 // config returns an immutable snapshot of the current daemon configuration. 168 // Multiple calls to this function will return the same pointer until the 169 // configuration is reloaded so callers must take care not to modify the 170 // returned value. 171 // 172 // To ensure that the configuration used remains consistent throughout the 173 // lifetime of an operation, the configuration pointer should be passed down the 174 // call stack, like one would a [context.Context] value. Only the entrypoints 175 // for operations, the outermost functions, should call this function. 176 func (daemon *Daemon) config() *configStore { 177 cfg := daemon.configStore.Load() 178 if cfg == nil { 179 return &configStore{} 180 } 181 return cfg 182 } 183 184 // Config returns daemon's config. 185 func (daemon *Daemon) Config() config.Config { 186 return daemon.config().Config 187 } 188 189 // HasExperimental returns whether the experimental features of the daemon are enabled or not 190 func (daemon *Daemon) HasExperimental() bool { 191 return daemon.config().Experimental 192 } 193 194 // Features returns the features map from configStore 195 func (daemon *Daemon) Features() map[string]bool { 196 return daemon.config().Features 197 } 198 199 // UsesSnapshotter returns true if feature flag to use containerd snapshotter is enabled 200 func (daemon *Daemon) UsesSnapshotter() bool { 201 return daemon.usesSnapshotter 202 } 203 204 // RegistryHosts returns the registry hosts configuration for the host component 205 // of a distribution image reference. 206 func (daemon *Daemon) RegistryHosts(host string) ([]docker.RegistryHost, error) { 207 m := map[string]resolverconfig.RegistryConfig{ 208 "docker.io": {Mirrors: daemon.registryService.ServiceConfig().Mirrors}, 209 } 210 conf := daemon.registryService.ServiceConfig().IndexConfigs 211 for k, v := range conf { 212 c := m[k] 213 if !v.Secure { 214 t := true 215 c.PlainHTTP = &t 216 c.Insecure = &t 217 } 218 m[k] = c 219 } 220 if c, ok := m[host]; !ok && daemon.registryService.IsInsecureRegistry(host) { 221 t := true 222 c.PlainHTTP = &t 223 c.Insecure = &t 224 m[host] = c 225 } 226 227 for k, v := range m { 228 v.TLSConfigDir = []string{registry.HostCertsDir(k)} 229 m[k] = v 230 } 231 232 certsDir := registry.CertsDir() 233 if fis, err := os.ReadDir(certsDir); err == nil { 234 for _, fi := range fis { 235 if _, ok := m[fi.Name()]; !ok { 236 m[fi.Name()] = resolverconfig.RegistryConfig{ 237 TLSConfigDir: []string{filepath.Join(certsDir, fi.Name())}, 238 } 239 } 240 } 241 } 242 243 return resolver.NewRegistryConfig(m)(host) 244 } 245 246 // layerAccessor may be implemented by ImageService 247 type layerAccessor interface { 248 GetLayerByID(cid string) (layer.RWLayer, error) 249 } 250 251 func (daemon *Daemon) restore(cfg *configStore) error { 252 var mapLock sync.Mutex 253 containers := make(map[string]*container.Container) 254 255 log.G(context.TODO()).Info("Loading containers: start.") 256 257 dir, err := os.ReadDir(daemon.repository) 258 if err != nil { 259 return err 260 } 261 262 // parallelLimit is the maximum number of parallel startup jobs that we 263 // allow (this is the limited used for all startup semaphores). The multipler 264 // (128) was chosen after some fairly significant benchmarking -- don't change 265 // it unless you've tested it significantly (this value is adjusted if 266 // RLIMIT_NOFILE is small to avoid EMFILE). 267 parallelLimit := adjustParallelLimit(len(dir), 128*runtime.NumCPU()) 268 269 // Re-used for all parallel startup jobs. 270 var group sync.WaitGroup 271 sem := semaphore.NewWeighted(int64(parallelLimit)) 272 273 for _, v := range dir { 274 group.Add(1) 275 go func(id string) { 276 defer group.Done() 277 _ = sem.Acquire(context.Background(), 1) 278 defer sem.Release(1) 279 280 logger := log.G(context.TODO()).WithField("container", id) 281 282 c, err := daemon.load(id) 283 if err != nil { 284 logger.WithError(err).Error("failed to load container") 285 return 286 } 287 if c.Driver != daemon.imageService.StorageDriver() { 288 // Ignore the container if it wasn't created with the current storage-driver 289 logger.Debugf("not restoring container because it was created with another storage driver (%s)", c.Driver) 290 return 291 } 292 if accessor, ok := daemon.imageService.(layerAccessor); ok { 293 rwlayer, err := accessor.GetLayerByID(c.ID) 294 if err != nil { 295 logger.WithError(err).Error("failed to load container mount") 296 return 297 } 298 c.RWLayer = rwlayer 299 } 300 logger.WithFields(log.Fields{ 301 "running": c.IsRunning(), 302 "paused": c.IsPaused(), 303 }).Debug("loaded container") 304 305 mapLock.Lock() 306 containers[c.ID] = c 307 mapLock.Unlock() 308 }(v.Name()) 309 } 310 group.Wait() 311 312 removeContainers := make(map[string]*container.Container) 313 restartContainers := make(map[*container.Container]chan struct{}) 314 activeSandboxes := make(map[string]interface{}) 315 316 for _, c := range containers { 317 group.Add(1) 318 go func(c *container.Container) { 319 defer group.Done() 320 _ = sem.Acquire(context.Background(), 1) 321 defer sem.Release(1) 322 323 logger := log.G(context.TODO()).WithField("container", c.ID) 324 325 if err := daemon.registerName(c); err != nil { 326 logger.WithError(err).Errorf("failed to register container name: %s", c.Name) 327 mapLock.Lock() 328 delete(containers, c.ID) 329 mapLock.Unlock() 330 return 331 } 332 if err := daemon.Register(c); err != nil { 333 logger.WithError(err).Error("failed to register container") 334 mapLock.Lock() 335 delete(containers, c.ID) 336 mapLock.Unlock() 337 return 338 } 339 }(c) 340 } 341 group.Wait() 342 343 for _, c := range containers { 344 group.Add(1) 345 go func(c *container.Container) { 346 defer group.Done() 347 _ = sem.Acquire(context.Background(), 1) 348 defer sem.Release(1) 349 350 baseLogger := log.G(context.TODO()).WithField("container", c.ID) 351 352 if c.HostConfig != nil { 353 // Migrate containers that don't have the default ("no") restart-policy set. 354 // The RestartPolicy.Name field may be empty for containers that were 355 // created with versions before v25.0.0. 356 // 357 // We also need to set the MaximumRetryCount to 0, to prevent 358 // validation from failing (MaximumRetryCount is not allowed if 359 // no restart-policy ("none") is set). 360 if c.HostConfig.RestartPolicy.Name == "" { 361 baseLogger.Debug("migrated restart-policy") 362 c.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled 363 c.HostConfig.RestartPolicy.MaximumRetryCount = 0 364 } 365 366 // Migrate containers that use the deprecated (and now non-functional) 367 // logentries driver. Update them to use the "local" logging driver 368 // instead. 369 // 370 // TODO(thaJeztah): remove logentries check and migration code in release v26.0.0. 371 if c.HostConfig.LogConfig.Type == "logentries" { 372 baseLogger.Warn("migrated deprecated logentries logging driver") 373 c.HostConfig.LogConfig = containertypes.LogConfig{ 374 Type: local.Name, 375 } 376 } 377 378 // Normalize the "default" network mode into the network mode 379 // it aliases ("bridge on Linux and "nat" on Windows). This is 380 // also done by the container router, for new containers. But 381 // we need to do it here too to handle containers that were 382 // created prior to v26.0. 383 // 384 // TODO(aker): remove this migration code once the next LTM version of MCR is released. 385 if c.HostConfig.NetworkMode.IsDefault() { 386 c.HostConfig.NetworkMode = runconfig.DefaultDaemonNetworkMode() 387 if nw, ok := c.NetworkSettings.Networks[networktypes.NetworkDefault]; ok { 388 c.NetworkSettings.Networks[c.HostConfig.NetworkMode.NetworkName()] = nw 389 delete(c.NetworkSettings.Networks, networktypes.NetworkDefault) 390 } 391 } 392 } 393 394 if err := daemon.checkpointAndSave(c); err != nil { 395 baseLogger.WithError(err).Error("failed to save migrated container config to disk") 396 } 397 398 daemon.setStateCounter(c) 399 400 logger := func(c *container.Container) *log.Entry { 401 return baseLogger.WithFields(log.Fields{ 402 "running": c.IsRunning(), 403 "paused": c.IsPaused(), 404 "restarting": c.IsRestarting(), 405 }) 406 } 407 408 logger(c).Debug("restoring container") 409 410 var es *containerd.ExitStatus 411 412 if err := c.RestoreTask(context.Background(), daemon.containerd); err != nil && !errdefs.IsNotFound(err) { 413 logger(c).WithError(err).Error("failed to restore container with containerd") 414 return 415 } 416 417 alive := false 418 status := containerd.Unknown 419 if tsk, ok := c.Task(); ok { 420 s, err := tsk.Status(context.Background()) 421 if err != nil { 422 logger(c).WithError(err).Error("failed to get task status") 423 } else { 424 status = s.Status 425 alive = status != containerd.Stopped 426 if !alive { 427 logger(c).Debug("cleaning up dead container process") 428 es, err = tsk.Delete(context.Background()) 429 if err != nil && !errdefs.IsNotFound(err) { 430 logger(c).WithError(err).Error("failed to delete task from containerd") 431 return 432 } 433 } else if !cfg.LiveRestoreEnabled { 434 logger(c).Debug("shutting down container considered alive by containerd") 435 if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) { 436 baseLogger.WithError(err).Error("error shutting down container") 437 return 438 } 439 status = containerd.Stopped 440 alive = false 441 c.ResetRestartManager(false) 442 } 443 } 444 } 445 // If the containerd task for the container was not found, docker's view of the 446 // container state will be updated accordingly via SetStopped further down. 447 448 if c.IsRunning() || c.IsPaused() { 449 logger(c).Debug("syncing container on disk state with real state") 450 451 c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking 452 453 switch { 454 case c.IsPaused() && alive: 455 logger(c).WithField("state", status).Info("restored container paused") 456 switch status { 457 case containerd.Paused, containerd.Pausing: 458 // nothing to do 459 case containerd.Unknown, containerd.Stopped, "": 460 baseLogger.WithField("status", status).Error("unexpected status for paused container during restore") 461 default: 462 // running 463 c.Lock() 464 c.Paused = false 465 daemon.setStateCounter(c) 466 daemon.initHealthMonitor(c) 467 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 468 baseLogger.WithError(err).Error("failed to update paused container state") 469 } 470 c.Unlock() 471 } 472 case !c.IsPaused() && alive: 473 logger(c).Debug("restoring healthcheck") 474 c.Lock() 475 daemon.initHealthMonitor(c) 476 c.Unlock() 477 } 478 479 if !alive { 480 logger(c).Debug("setting stopped state") 481 c.Lock() 482 var ces container.ExitStatus 483 if es != nil { 484 ces.ExitCode = int(es.ExitCode()) 485 ces.ExitedAt = es.ExitTime() 486 } else { 487 ces.ExitCode = 255 488 } 489 c.SetStopped(&ces) 490 daemon.Cleanup(context.TODO(), c) 491 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 492 baseLogger.WithError(err).Error("failed to update stopped container state") 493 } 494 c.Unlock() 495 logger(c).Debug("set stopped state") 496 } 497 498 // we call Mount and then Unmount to get BaseFs of the container 499 if err := daemon.Mount(c); err != nil { 500 // The mount is unlikely to fail. However, in case mount fails 501 // the container should be allowed to restore here. Some functionalities 502 // (like docker exec -u user) might be missing but container is able to be 503 // stopped/restarted/removed. 504 // See #29365 for related information. 505 // The error is only logged here. 506 logger(c).WithError(err).Warn("failed to mount container to get BaseFs path") 507 } else { 508 if err := daemon.Unmount(c); err != nil { 509 logger(c).WithError(err).Warn("failed to umount container to get BaseFs path") 510 } 511 } 512 513 c.ResetRestartManager(false) 514 if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() { 515 options, err := daemon.buildSandboxOptions(&cfg.Config, c) 516 if err != nil { 517 logger(c).WithError(err).Warn("failed to build sandbox option to restore container") 518 } 519 mapLock.Lock() 520 activeSandboxes[c.NetworkSettings.SandboxID] = options 521 mapLock.Unlock() 522 } 523 } 524 525 // get list of containers we need to restart 526 527 // Do not autostart containers which 528 // has endpoints in a swarm scope 529 // network yet since the cluster is 530 // not initialized yet. We will start 531 // it after the cluster is 532 // initialized. 533 if cfg.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore { 534 mapLock.Lock() 535 restartContainers[c] = make(chan struct{}) 536 mapLock.Unlock() 537 } else if c.HostConfig != nil && c.HostConfig.AutoRemove { 538 // Remove the container if live-restore is disabled or if the container has already exited. 539 if !cfg.LiveRestoreEnabled || !alive { 540 mapLock.Lock() 541 removeContainers[c.ID] = c 542 mapLock.Unlock() 543 } 544 } 545 546 c.Lock() 547 if c.RemovalInProgress { 548 // We probably crashed in the middle of a removal, reset 549 // the flag. 550 // 551 // We DO NOT remove the container here as we do not 552 // know if the user had requested for either the 553 // associated volumes, network links or both to also 554 // be removed. So we put the container in the "dead" 555 // state and leave further processing up to them. 556 c.RemovalInProgress = false 557 c.Dead = true 558 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 559 baseLogger.WithError(err).Error("failed to update RemovalInProgress container state") 560 } else { 561 baseLogger.Debugf("reset RemovalInProgress state for container") 562 } 563 } 564 c.Unlock() 565 logger(c).Debug("done restoring container") 566 }(c) 567 } 568 group.Wait() 569 570 // Initialize the network controller and configure network settings. 571 // 572 // Note that we cannot initialize the network controller earlier, as it 573 // needs to know if there's active sandboxes (running containers). 574 if err = daemon.initNetworkController(&cfg.Config, activeSandboxes); err != nil { 575 return fmt.Errorf("Error initializing network controller: %v", err) 576 } 577 578 // Now that all the containers are registered, register the links 579 for _, c := range containers { 580 group.Add(1) 581 go func(c *container.Container) { 582 _ = sem.Acquire(context.Background(), 1) 583 584 if err := daemon.registerLinks(c, c.HostConfig); err != nil { 585 log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to register link for container") 586 } 587 588 sem.Release(1) 589 group.Done() 590 }(c) 591 } 592 group.Wait() 593 594 for c, notifyChan := range restartContainers { 595 group.Add(1) 596 go func(c *container.Container, chNotify chan struct{}) { 597 _ = sem.Acquire(context.Background(), 1) 598 599 logger := log.G(context.TODO()).WithField("container", c.ID) 600 601 logger.Debug("starting container") 602 603 // ignore errors here as this is a best effort to wait for children to be 604 // running before we try to start the container 605 children := daemon.children(c) 606 timeout := time.NewTimer(5 * time.Second) 607 defer timeout.Stop() 608 609 for _, child := range children { 610 if notifier, exists := restartContainers[child]; exists { 611 select { 612 case <-notifier: 613 case <-timeout.C: 614 } 615 } 616 } 617 618 if err := daemon.prepareMountPoints(c); err != nil { 619 logger.WithError(err).Error("failed to prepare mount points for container") 620 } 621 if err := daemon.containerStart(context.Background(), cfg, c, "", "", true); err != nil { 622 logger.WithError(err).Error("failed to start container") 623 } 624 close(chNotify) 625 626 sem.Release(1) 627 group.Done() 628 }(c, notifyChan) 629 } 630 group.Wait() 631 632 for id := range removeContainers { 633 group.Add(1) 634 go func(cid string) { 635 _ = sem.Acquire(context.Background(), 1) 636 637 if err := daemon.containerRm(&cfg.Config, cid, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil { 638 log.G(context.TODO()).WithField("container", cid).WithError(err).Error("failed to remove container") 639 } 640 641 sem.Release(1) 642 group.Done() 643 }(id) 644 } 645 group.Wait() 646 647 // any containers that were started above would already have had this done, 648 // however we need to now prepare the mountpoints for the rest of the containers as well. 649 // This shouldn't cause any issue running on the containers that already had this run. 650 // This must be run after any containers with a restart policy so that containerized plugins 651 // can have a chance to be running before we try to initialize them. 652 for _, c := range containers { 653 // if the container has restart policy, do not 654 // prepare the mountpoints since it has been done on restarting. 655 // This is to speed up the daemon start when a restart container 656 // has a volume and the volume driver is not available. 657 if _, ok := restartContainers[c]; ok { 658 continue 659 } else if _, ok := removeContainers[c.ID]; ok { 660 // container is automatically removed, skip it. 661 continue 662 } 663 664 group.Add(1) 665 go func(c *container.Container) { 666 _ = sem.Acquire(context.Background(), 1) 667 668 if err := daemon.prepareMountPoints(c); err != nil { 669 log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container") 670 } 671 672 sem.Release(1) 673 group.Done() 674 }(c) 675 } 676 group.Wait() 677 678 log.G(context.TODO()).Info("Loading containers: done.") 679 680 return nil 681 } 682 683 // RestartSwarmContainers restarts any autostart container which has a 684 // swarm endpoint. 685 func (daemon *Daemon) RestartSwarmContainers() { 686 daemon.restartSwarmContainers(context.Background(), daemon.config()) 687 } 688 689 func (daemon *Daemon) restartSwarmContainers(ctx context.Context, cfg *configStore) { 690 // parallelLimit is the maximum number of parallel startup jobs that we 691 // allow (this is the limited used for all startup semaphores). The multipler 692 // (128) was chosen after some fairly significant benchmarking -- don't change 693 // it unless you've tested it significantly (this value is adjusted if 694 // RLIMIT_NOFILE is small to avoid EMFILE). 695 parallelLimit := adjustParallelLimit(len(daemon.List()), 128*runtime.NumCPU()) 696 697 var group sync.WaitGroup 698 sem := semaphore.NewWeighted(int64(parallelLimit)) 699 700 for _, c := range daemon.List() { 701 if !c.IsRunning() && !c.IsPaused() { 702 // Autostart all the containers which has a 703 // swarm endpoint now that the cluster is 704 // initialized. 705 if cfg.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore { 706 group.Add(1) 707 go func(c *container.Container) { 708 if err := sem.Acquire(ctx, 1); err != nil { 709 // ctx is done. 710 group.Done() 711 return 712 } 713 714 if err := daemon.containerStart(ctx, cfg, c, "", "", true); err != nil { 715 log.G(ctx).WithField("container", c.ID).WithError(err).Error("failed to start swarm container") 716 } 717 718 sem.Release(1) 719 group.Done() 720 }(c) 721 } 722 } 723 } 724 group.Wait() 725 } 726 727 func (daemon *Daemon) children(c *container.Container) map[string]*container.Container { 728 return daemon.linkIndex.children(c) 729 } 730 731 // parents returns the names of the parent containers of the container 732 // with the given name. 733 func (daemon *Daemon) parents(c *container.Container) map[string]*container.Container { 734 return daemon.linkIndex.parents(c) 735 } 736 737 func (daemon *Daemon) registerLink(parent, child *container.Container, alias string) error { 738 fullName := path.Join(parent.Name, alias) 739 if err := daemon.containersReplica.ReserveName(fullName, child.ID); err != nil { 740 if errors.Is(err, container.ErrNameReserved) { 741 log.G(context.TODO()).Warnf("error registering link for %s, to %s, as alias %s, ignoring: %v", parent.ID, child.ID, alias, err) 742 return nil 743 } 744 return err 745 } 746 daemon.linkIndex.link(parent, child, fullName) 747 return nil 748 } 749 750 // DaemonJoinsCluster informs the daemon has joined the cluster and provides 751 // the handler to query the cluster component 752 func (daemon *Daemon) DaemonJoinsCluster(clusterProvider cluster.Provider) { 753 daemon.setClusterProvider(clusterProvider) 754 } 755 756 // DaemonLeavesCluster informs the daemon has left the cluster 757 func (daemon *Daemon) DaemonLeavesCluster() { 758 // Daemon is in charge of removing the attachable networks with 759 // connected containers when the node leaves the swarm 760 daemon.clearAttachableNetworks() 761 // We no longer need the cluster provider, stop it now so that 762 // the network agent will stop listening to cluster events. 763 daemon.setClusterProvider(nil) 764 // Wait for the networking cluster agent to stop 765 daemon.netController.AgentStopWait() 766 // Daemon is in charge of removing the ingress network when the 767 // node leaves the swarm. Wait for job to be done or timeout. 768 // This is called also on graceful daemon shutdown. We need to 769 // wait, because the ingress release has to happen before the 770 // network controller is stopped. 771 772 if done, err := daemon.ReleaseIngress(); err == nil { 773 timeout := time.NewTimer(5 * time.Second) 774 defer timeout.Stop() 775 776 select { 777 case <-done: 778 case <-timeout.C: 779 log.G(context.TODO()).Warn("timeout while waiting for ingress network removal") 780 } 781 } else { 782 log.G(context.TODO()).Warnf("failed to initiate ingress network removal: %v", err) 783 } 784 785 daemon.attachmentStore.ClearAttachments() 786 } 787 788 // setClusterProvider sets a component for querying the current cluster state. 789 func (daemon *Daemon) setClusterProvider(clusterProvider cluster.Provider) { 790 daemon.clusterProvider = clusterProvider 791 daemon.netController.SetClusterProvider(clusterProvider) 792 daemon.attachableNetworkLock = locker.New() 793 } 794 795 // IsSwarmCompatible verifies if the current daemon 796 // configuration is compatible with the swarm mode 797 func (daemon *Daemon) IsSwarmCompatible() error { 798 return daemon.config().IsSwarmCompatible() 799 } 800 801 // NewDaemon sets up everything for the daemon to be able to service 802 // requests from the webserver. 803 func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.Store, authzMiddleware *authorization.Middleware) (daemon *Daemon, err error) { 804 // Verify platform-specific requirements. 805 // TODO(thaJeztah): this should be called before we try to create the daemon; perhaps together with the config validation. 806 if err := checkSystem(); err != nil { 807 return nil, err 808 } 809 810 registryService, err := registry.NewService(config.ServiceOptions) 811 if err != nil { 812 return nil, err 813 } 814 815 // Ensure that we have a correct root key limit for launching containers. 816 if err := modifyRootKeyLimit(); err != nil { 817 log.G(ctx).Warnf("unable to modify root key limit, number of containers could be limited by this quota: %v", err) 818 } 819 820 // Ensure we have compatible and valid configuration options 821 if err := verifyDaemonSettings(config); err != nil { 822 return nil, err 823 } 824 825 // Do we have a disabled network? 826 config.DisableBridge = isBridgeNetworkDisabled(config) 827 828 // Setup the resolv.conf 829 setupResolvConf(config) 830 831 idMapping, err := setupRemappedRoot(config) 832 if err != nil { 833 return nil, err 834 } 835 rootIDs := idMapping.RootPair() 836 if err := setMayDetachMounts(); err != nil { 837 log.G(ctx).WithError(err).Warn("Could not set may_detach_mounts kernel parameter") 838 } 839 840 // set up the tmpDir to use a canonical path 841 tmp, err := prepareTempDir(config.Root) 842 if err != nil { 843 return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err) 844 } 845 realTmp, err := fileutils.ReadSymlinkedDirectory(tmp) 846 if err != nil { 847 return nil, fmt.Errorf("Unable to get the full path to the TempDir (%s): %s", tmp, err) 848 } 849 if isWindows { 850 if err := system.MkdirAll(realTmp, 0); err != nil { 851 return nil, fmt.Errorf("Unable to create the TempDir (%s): %s", realTmp, err) 852 } 853 os.Setenv("TEMP", realTmp) 854 os.Setenv("TMP", realTmp) 855 } else { 856 os.Setenv("TMPDIR", realTmp) 857 } 858 859 if err := initRuntimesDir(config); err != nil { 860 return nil, err 861 } 862 rts, err := setupRuntimes(config) 863 if err != nil { 864 return nil, err 865 } 866 867 d := &Daemon{ 868 PluginStore: pluginStore, 869 startupDone: make(chan struct{}), 870 } 871 cfgStore := &configStore{ 872 Config: *config, 873 Runtimes: rts, 874 } 875 d.configStore.Store(cfgStore) 876 877 // TEST_INTEGRATION_USE_SNAPSHOTTER is used for integration tests only. 878 if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" { 879 d.usesSnapshotter = true 880 } else { 881 d.usesSnapshotter = config.Features["containerd-snapshotter"] 882 } 883 884 // Ensure the daemon is properly shutdown if there is a failure during 885 // initialization 886 defer func() { 887 if err != nil { 888 // Use a fresh context here. Passed context could be cancelled. 889 if err := d.Shutdown(context.Background()); err != nil { 890 log.G(ctx).Error(err) 891 } 892 } 893 }() 894 895 if err := d.setGenericResources(&cfgStore.Config); err != nil { 896 return nil, err 897 } 898 // set up SIGUSR1 handler on Unix-like systems, or a Win32 global event 899 // on Windows to dump Go routine stacks 900 stackDumpDir := cfgStore.Root 901 if execRoot := cfgStore.GetExecRoot(); execRoot != "" { 902 stackDumpDir = execRoot 903 } 904 d.setupDumpStackTrap(stackDumpDir) 905 906 if err := d.setupSeccompProfile(&cfgStore.Config); err != nil { 907 return nil, err 908 } 909 910 // Set the default isolation mode (only applicable on Windows) 911 if err := d.setDefaultIsolation(&cfgStore.Config); err != nil { 912 return nil, fmt.Errorf("error setting default isolation mode: %v", err) 913 } 914 915 if err := configureMaxThreads(&cfgStore.Config); err != nil { 916 log.G(ctx).Warnf("Failed to configure golang's threads limit: %v", err) 917 } 918 919 // ensureDefaultAppArmorProfile does nothing if apparmor is disabled 920 if err := ensureDefaultAppArmorProfile(); err != nil { 921 log.G(ctx).Errorf(err.Error()) 922 } 923 924 daemonRepo := filepath.Join(cfgStore.Root, "containers") 925 if err := idtools.MkdirAllAndChown(daemonRepo, 0o710, idtools.Identity{ 926 UID: idtools.CurrentIdentity().UID, 927 GID: rootIDs.GID, 928 }); err != nil { 929 return nil, err 930 } 931 932 if isWindows { 933 // Note that permissions (0o700) are ignored on Windows; passing them to 934 // show intent only. We could consider using idtools.MkdirAndChown here 935 // to apply an ACL. 936 if err = os.Mkdir(filepath.Join(cfgStore.Root, "credentialspecs"), 0o700); err != nil && !errors.Is(err, os.ErrExist) { 937 return nil, err 938 } 939 } 940 941 d.registryService = registryService 942 dlogger.RegisterPluginGetter(d.PluginStore) 943 944 metricsSockPath, err := d.listenMetricsSock(&cfgStore.Config) 945 if err != nil { 946 return nil, err 947 } 948 registerMetricsPluginCallback(d.PluginStore, metricsSockPath) 949 950 backoffConfig := backoff.DefaultConfig 951 backoffConfig.MaxDelay = 3 * time.Second 952 connParams := grpc.ConnectParams{ 953 Backoff: backoffConfig, 954 } 955 gopts := []grpc.DialOption{ 956 // WithBlock makes sure that the following containerd request 957 // is reliable. 958 // 959 // NOTE: In one edge case with high load pressure, kernel kills 960 // dockerd, containerd and containerd-shims caused by OOM. 961 // When both dockerd and containerd restart, but containerd 962 // will take time to recover all the existing containers. Before 963 // containerd serving, dockerd will failed with gRPC error. 964 // That bad thing is that restore action will still ignore the 965 // any non-NotFound errors and returns running state for 966 // already stopped container. It is unexpected behavior. And 967 // we need to restart dockerd to make sure that anything is OK. 968 // 969 // It is painful. Add WithBlock can prevent the edge case. And 970 // n common case, the containerd will be serving in shortly. 971 // It is not harm to add WithBlock for containerd connection. 972 grpc.WithBlock(), 973 974 grpc.WithTransportCredentials(insecure.NewCredentials()), 975 grpc.WithConnectParams(connParams), 976 grpc.WithContextDialer(dialer.ContextDialer), 977 978 // TODO(stevvooe): We may need to allow configuration of this on the client. 979 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)), 980 grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)), 981 grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()), //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/moby/issues/47437 982 grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor()), //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/moby/issues/47437 983 } 984 985 if cfgStore.ContainerdAddr != "" { 986 d.containerdClient, err = containerd.New( 987 cfgStore.ContainerdAddr, 988 containerd.WithDefaultNamespace(cfgStore.ContainerdNamespace), 989 containerd.WithDialOpts(gopts), 990 containerd.WithTimeout(60*time.Second), 991 ) 992 if err != nil { 993 return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr) 994 } 995 } 996 997 createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) { 998 var pluginCli *containerd.Client 999 1000 if cfgStore.ContainerdAddr != "" { 1001 pluginCli, err = containerd.New( 1002 cfgStore.ContainerdAddr, 1003 containerd.WithDefaultNamespace(cfgStore.ContainerdPluginNamespace), 1004 containerd.WithDialOpts(gopts), 1005 containerd.WithTimeout(60*time.Second), 1006 ) 1007 if err != nil { 1008 return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr) 1009 } 1010 } 1011 1012 var ( 1013 shim string 1014 shimOpts interface{} 1015 ) 1016 if runtime.GOOS != "windows" { 1017 shim, shimOpts, err = rts.Get("") 1018 if err != nil { 1019 return nil, err 1020 } 1021 } 1022 return pluginexec.New(ctx, getPluginExecRoot(&cfgStore.Config), pluginCli, cfgStore.ContainerdPluginNamespace, m, shim, shimOpts) 1023 } 1024 1025 // Plugin system initialization should happen before restore. Do not change order. 1026 d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{ 1027 Root: filepath.Join(cfgStore.Root, "plugins"), 1028 ExecRoot: getPluginExecRoot(&cfgStore.Config), 1029 Store: d.PluginStore, 1030 CreateExecutor: createPluginExec, 1031 RegistryService: registryService, 1032 LiveRestoreEnabled: cfgStore.LiveRestoreEnabled, 1033 LogPluginEvent: d.LogPluginEvent, // todo: make private 1034 AuthzMiddleware: authzMiddleware, 1035 }) 1036 if err != nil { 1037 return nil, errors.Wrap(err, "couldn't create plugin manager") 1038 } 1039 1040 d.defaultLogConfig, err = defaultLogConfig(&cfgStore.Config) 1041 if err != nil { 1042 return nil, errors.Wrap(err, "failed to set log opts") 1043 } 1044 log.G(ctx).Debugf("Using default logging driver %s", d.defaultLogConfig.Type) 1045 1046 d.volumes, err = volumesservice.NewVolumeService(cfgStore.Root, d.PluginStore, rootIDs, d) 1047 if err != nil { 1048 return nil, err 1049 } 1050 1051 // Check if Devices cgroup is mounted, it is hard requirement for container security, 1052 // on Linux. 1053 // 1054 // Important: we call getSysInfo() directly here, without storing the results, 1055 // as networking has not yet been set up, so we only have partial system info 1056 // at this point. 1057 // 1058 // TODO(thaJeztah) add a utility to only collect the CgroupDevicesEnabled information 1059 if runtime.GOOS == "linux" && !userns.RunningInUserNS() && !getSysInfo(&cfgStore.Config).CgroupDevicesEnabled { 1060 return nil, errors.New("Devices cgroup isn't mounted") 1061 } 1062 1063 d.id, err = LoadOrCreateID(cfgStore.Root) 1064 if err != nil { 1065 return nil, err 1066 } 1067 d.repository = daemonRepo 1068 d.containers = container.NewMemoryStore() 1069 if d.containersReplica, err = container.NewViewDB(); err != nil { 1070 return nil, err 1071 } 1072 d.execCommands = container.NewExecStore() 1073 d.statsCollector = d.newStatsCollector(1 * time.Second) 1074 1075 d.EventsService = events.New() 1076 d.root = cfgStore.Root 1077 d.idMapping = idMapping 1078 1079 d.linkIndex = newLinkIndex() 1080 1081 // On Windows we don't support the environment variable, or a user supplied graphdriver 1082 // Unix platforms however run a single graphdriver for all containers, and it can 1083 // be set through an environment variable, a daemon start parameter, or chosen through 1084 // initialization of the layerstore through driver priority order for example. 1085 driverName := os.Getenv("DOCKER_DRIVER") 1086 if isWindows && d.UsesSnapshotter() { 1087 // Containerd WCOW snapshotter 1088 driverName = "windows" 1089 } else if isWindows { 1090 // Docker WCOW graphdriver 1091 driverName = "windowsfilter" 1092 } else if driverName != "" { 1093 log.G(ctx).Infof("Setting the storage driver from the $DOCKER_DRIVER environment variable (%s)", driverName) 1094 } else { 1095 driverName = cfgStore.GraphDriver 1096 } 1097 1098 if d.UsesSnapshotter() { 1099 if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" { 1100 log.G(ctx).Warn("Enabling containerd snapshotter through the $TEST_INTEGRATION_USE_SNAPSHOTTER environment variable. This should only be used for testing.") 1101 } 1102 log.G(ctx).Info("Starting daemon with containerd snapshotter integration enabled") 1103 1104 // FIXME(thaJeztah): implement automatic snapshotter-selection similar to graph-driver selection; see https://github.com/moby/moby/issues/44076 1105 if driverName == "" { 1106 driverName = containerd.DefaultSnapshotter 1107 } 1108 1109 // Configure and validate the kernels security support. Note this is a Linux/FreeBSD 1110 // operation only, so it is safe to pass *just* the runtime OS graphdriver. 1111 if err := configureKernelSecuritySupport(&cfgStore.Config, driverName); err != nil { 1112 return nil, err 1113 } 1114 d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{ 1115 Client: d.containerdClient, 1116 Containers: d.containers, 1117 Snapshotter: driverName, 1118 RegistryHosts: d.RegistryHosts, 1119 Registry: d.registryService, 1120 EventsService: d.EventsService, 1121 IDMapping: idMapping, 1122 RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping), 1123 }) 1124 } else { 1125 layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{ 1126 Root: cfgStore.Root, 1127 MetadataStorePathTemplate: filepath.Join(cfgStore.Root, "image", "%s", "layerdb"), 1128 GraphDriver: driverName, 1129 GraphDriverOptions: cfgStore.GraphOptions, 1130 IDMapping: idMapping, 1131 PluginGetter: d.PluginStore, 1132 ExperimentalEnabled: cfgStore.Experimental, 1133 }) 1134 if err != nil { 1135 return nil, err 1136 } 1137 1138 // Configure and validate the kernels security support. Note this is a Linux/FreeBSD 1139 // operation only, so it is safe to pass *just* the runtime OS graphdriver. 1140 if err := configureKernelSecuritySupport(&cfgStore.Config, layerStore.DriverName()); err != nil { 1141 return nil, err 1142 } 1143 1144 imageRoot := filepath.Join(cfgStore.Root, "image", layerStore.DriverName()) 1145 ifs, err := image.NewFSStoreBackend(filepath.Join(imageRoot, "imagedb")) 1146 if err != nil { 1147 return nil, err 1148 } 1149 1150 // We have a single tag/reference store for the daemon globally. However, it's 1151 // stored under the graphdriver. On host platforms which only support a single 1152 // container OS, but multiple selectable graphdrivers, this means depending on which 1153 // graphdriver is chosen, the global reference store is under there. For 1154 // platforms which support multiple container operating systems, this is slightly 1155 // more problematic as where does the global ref store get located? Fortunately, 1156 // for Windows, which is currently the only daemon supporting multiple container 1157 // operating systems, the list of graphdrivers available isn't user configurable. 1158 // For backwards compatibility, we just put it under the windowsfilter 1159 // directory regardless. 1160 refStoreLocation := filepath.Join(imageRoot, `repositories.json`) 1161 rs, err := refstore.NewReferenceStore(refStoreLocation) 1162 if err != nil { 1163 return nil, fmt.Errorf("Couldn't create reference store repository: %s", err) 1164 } 1165 d.ReferenceStore = rs 1166 1167 imageStore, err := image.NewImageStore(ifs, layerStore) 1168 if err != nil { 1169 return nil, err 1170 } 1171 1172 distributionMetadataStore, err := dmetadata.NewFSMetadataStore(filepath.Join(imageRoot, "distribution")) 1173 if err != nil { 1174 return nil, err 1175 } 1176 1177 imgSvcConfig := images.ImageServiceConfig{ 1178 ContainerStore: d.containers, 1179 DistributionMetadataStore: distributionMetadataStore, 1180 EventsService: d.EventsService, 1181 ImageStore: imageStore, 1182 LayerStore: layerStore, 1183 MaxConcurrentDownloads: config.MaxConcurrentDownloads, 1184 MaxConcurrentUploads: config.MaxConcurrentUploads, 1185 MaxDownloadAttempts: config.MaxDownloadAttempts, 1186 ReferenceStore: rs, 1187 RegistryService: registryService, 1188 ContentNamespace: config.ContainerdNamespace, 1189 } 1190 1191 // containerd is not currently supported with Windows. 1192 // So sometimes d.containerdCli will be nil 1193 // In that case we'll create a local content store... but otherwise we'll use containerd 1194 if d.containerdClient != nil { 1195 imgSvcConfig.Leases = d.containerdClient.LeasesService() 1196 imgSvcConfig.ContentStore = d.containerdClient.ContentStore() 1197 } else { 1198 imgSvcConfig.ContentStore, imgSvcConfig.Leases, err = d.configureLocalContentStore(config.ContainerdNamespace) 1199 if err != nil { 1200 return nil, err 1201 } 1202 } 1203 1204 // TODO: imageStore, distributionMetadataStore, and ReferenceStore are only 1205 // used above to run migration. They could be initialized in ImageService 1206 // if migration is called from daemon/images. layerStore might move as well. 1207 d.imageService = images.NewImageService(imgSvcConfig) 1208 1209 log.G(ctx).Debugf("Max Concurrent Downloads: %d", imgSvcConfig.MaxConcurrentDownloads) 1210 log.G(ctx).Debugf("Max Concurrent Uploads: %d", imgSvcConfig.MaxConcurrentUploads) 1211 log.G(ctx).Debugf("Max Download Attempts: %d", imgSvcConfig.MaxDownloadAttempts) 1212 } 1213 1214 go d.execCommandGC() 1215 1216 if err := d.initLibcontainerd(ctx, &cfgStore.Config); err != nil { 1217 return nil, err 1218 } 1219 1220 if err := d.restore(cfgStore); err != nil { 1221 return nil, err 1222 } 1223 close(d.startupDone) 1224 1225 info, err := d.SystemInfo(ctx) 1226 if err != nil { 1227 return nil, err 1228 } 1229 for _, w := range info.Warnings { 1230 log.G(ctx).Warn(w) 1231 } 1232 1233 engineInfo.WithValues( 1234 dockerversion.Version, 1235 dockerversion.GitCommit, 1236 info.Architecture, 1237 info.Driver, 1238 info.KernelVersion, 1239 info.OperatingSystem, 1240 info.OSType, 1241 info.OSVersion, 1242 info.ID, 1243 ).Set(1) 1244 engineCpus.Set(float64(info.NCPU)) 1245 engineMemory.Set(float64(info.MemTotal)) 1246 1247 log.G(ctx).WithFields(log.Fields{ 1248 "version": dockerversion.Version, 1249 "commit": dockerversion.GitCommit, 1250 "storage-driver": d.ImageService().StorageDriver(), 1251 "containerd-snapshotter": d.UsesSnapshotter(), 1252 }).Info("Docker daemon") 1253 1254 return d, nil 1255 } 1256 1257 // DistributionServices returns services controlling daemon storage 1258 func (daemon *Daemon) DistributionServices() images.DistributionServices { 1259 return daemon.imageService.DistributionServices() 1260 } 1261 1262 func (daemon *Daemon) waitForStartupDone() { 1263 <-daemon.startupDone 1264 } 1265 1266 func (daemon *Daemon) shutdownContainer(c *container.Container) error { 1267 ctx := compatcontext.WithoutCancel(context.TODO()) 1268 1269 // If container failed to exit in stopTimeout seconds of SIGTERM, then using the force 1270 if err := daemon.containerStop(ctx, c, containertypes.StopOptions{}); err != nil { 1271 return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err) 1272 } 1273 1274 // Wait without timeout for the container to exit. 1275 // Ignore the result. 1276 <-c.Wait(ctx, container.WaitConditionNotRunning) 1277 return nil 1278 } 1279 1280 // ShutdownTimeout returns the timeout (in seconds) before containers are forcibly 1281 // killed during shutdown. The default timeout can be configured both on the daemon 1282 // and per container, and the longest timeout will be used. A grace-period of 1283 // 5 seconds is added to the configured timeout. 1284 // 1285 // A negative (-1) timeout means "indefinitely", which means that containers 1286 // are not forcibly killed, and the daemon shuts down after all containers exit. 1287 func (daemon *Daemon) ShutdownTimeout() int { 1288 return daemon.shutdownTimeout(&daemon.config().Config) 1289 } 1290 1291 func (daemon *Daemon) shutdownTimeout(cfg *config.Config) int { 1292 shutdownTimeout := cfg.ShutdownTimeout 1293 if shutdownTimeout < 0 { 1294 return -1 1295 } 1296 if daemon.containers == nil { 1297 return shutdownTimeout 1298 } 1299 1300 graceTimeout := 5 1301 for _, c := range daemon.containers.List() { 1302 stopTimeout := c.StopTimeout() 1303 if stopTimeout < 0 { 1304 return -1 1305 } 1306 if stopTimeout+graceTimeout > shutdownTimeout { 1307 shutdownTimeout = stopTimeout + graceTimeout 1308 } 1309 } 1310 return shutdownTimeout 1311 } 1312 1313 // Shutdown stops the daemon. 1314 func (daemon *Daemon) Shutdown(ctx context.Context) error { 1315 daemon.shutdown = true 1316 // Keep mounts and networking running on daemon shutdown if 1317 // we are to keep containers running and restore them. 1318 1319 cfg := &daemon.config().Config 1320 if cfg.LiveRestoreEnabled && daemon.containers != nil { 1321 // check if there are any running containers, if none we should do some cleanup 1322 if ls, err := daemon.Containers(ctx, &containertypes.ListOptions{}); len(ls) != 0 || err != nil { 1323 // metrics plugins still need some cleanup 1324 daemon.cleanupMetricsPlugins() 1325 return err 1326 } 1327 } 1328 1329 if daemon.containers != nil { 1330 log.G(ctx).Debugf("daemon configured with a %d seconds minimum shutdown timeout", cfg.ShutdownTimeout) 1331 log.G(ctx).Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.shutdownTimeout(cfg)) 1332 daemon.containers.ApplyAll(func(c *container.Container) { 1333 if !c.IsRunning() { 1334 return 1335 } 1336 logger := log.G(ctx).WithField("container", c.ID) 1337 logger.Debug("shutting down container") 1338 if err := daemon.shutdownContainer(c); err != nil { 1339 logger.WithError(err).Error("failed to shut down container") 1340 return 1341 } 1342 if mountid, err := daemon.imageService.GetLayerMountID(c.ID); err == nil { 1343 daemon.cleanupMountsByID(mountid) 1344 } 1345 logger.Debugf("shut down container") 1346 }) 1347 } 1348 1349 if daemon.volumes != nil { 1350 if err := daemon.volumes.Shutdown(); err != nil { 1351 log.G(ctx).Errorf("Error shutting down volume store: %v", err) 1352 } 1353 } 1354 1355 if daemon.imageService != nil { 1356 if err := daemon.imageService.Cleanup(); err != nil { 1357 log.G(ctx).Error(err) 1358 } 1359 } 1360 1361 // If we are part of a cluster, clean up cluster's stuff 1362 if daemon.clusterProvider != nil { 1363 log.G(ctx).Debugf("start clean shutdown of cluster resources...") 1364 daemon.DaemonLeavesCluster() 1365 } 1366 1367 daemon.cleanupMetricsPlugins() 1368 1369 // Shutdown plugins after containers and layerstore. Don't change the order. 1370 daemon.pluginShutdown() 1371 1372 // trigger libnetwork Stop only if it's initialized 1373 if daemon.netController != nil { 1374 daemon.netController.Stop() 1375 } 1376 1377 if daemon.containerdClient != nil { 1378 daemon.containerdClient.Close() 1379 } 1380 1381 if daemon.mdDB != nil { 1382 daemon.mdDB.Close() 1383 } 1384 1385 return daemon.cleanupMounts(cfg) 1386 } 1387 1388 // Mount sets container.BaseFS 1389 func (daemon *Daemon) Mount(container *container.Container) error { 1390 return daemon.imageService.Mount(context.Background(), container) 1391 } 1392 1393 // Unmount unsets the container base filesystem 1394 func (daemon *Daemon) Unmount(container *container.Container) error { 1395 return daemon.imageService.Unmount(context.Background(), container) 1396 } 1397 1398 // Subnets return the IPv4 and IPv6 subnets of networks that are manager by Docker. 1399 func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) { 1400 var v4Subnets []net.IPNet 1401 var v6Subnets []net.IPNet 1402 1403 for _, managedNetwork := range daemon.netController.Networks(context.TODO()) { 1404 v4infos, v6infos := managedNetwork.IpamInfo() 1405 for _, info := range v4infos { 1406 if info.IPAMData.Pool != nil { 1407 v4Subnets = append(v4Subnets, *info.IPAMData.Pool) 1408 } 1409 } 1410 for _, info := range v6infos { 1411 if info.IPAMData.Pool != nil { 1412 v6Subnets = append(v6Subnets, *info.IPAMData.Pool) 1413 } 1414 } 1415 } 1416 1417 return v4Subnets, v6Subnets 1418 } 1419 1420 // prepareTempDir prepares and returns the default directory to use 1421 // for temporary files. 1422 // If it doesn't exist, it is created. If it exists, its content is removed. 1423 func prepareTempDir(rootDir string) (string, error) { 1424 var tmpDir string 1425 if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" { 1426 tmpDir = filepath.Join(rootDir, "tmp") 1427 newName := tmpDir + "-old" 1428 if err := os.Rename(tmpDir, newName); err == nil { 1429 go func() { 1430 if err := os.RemoveAll(newName); err != nil { 1431 log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", newName) 1432 } 1433 }() 1434 } else if !os.IsNotExist(err) { 1435 log.G(context.TODO()).Warnf("failed to rename %s for background deletion: %s. Deleting synchronously", tmpDir, err) 1436 if err := os.RemoveAll(tmpDir); err != nil { 1437 log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", tmpDir) 1438 } 1439 } 1440 } 1441 return tmpDir, idtools.MkdirAllAndChown(tmpDir, 0o700, idtools.CurrentIdentity()) 1442 } 1443 1444 func (daemon *Daemon) setGenericResources(conf *config.Config) error { 1445 genericResources, err := config.ParseGenericResources(conf.NodeGenericResources) 1446 if err != nil { 1447 return err 1448 } 1449 1450 daemon.genericResources = genericResources 1451 1452 return nil 1453 } 1454 1455 // IsShuttingDown tells whether the daemon is shutting down or not 1456 func (daemon *Daemon) IsShuttingDown() bool { 1457 return daemon.shutdown 1458 } 1459 1460 func isBridgeNetworkDisabled(conf *config.Config) bool { 1461 return conf.BridgeConfig.Iface == config.DisableNetworkBridge 1462 } 1463 1464 func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) { 1465 dd := runconfig.DefaultDaemonNetworkMode() 1466 1467 options := []nwconfig.Option{ 1468 nwconfig.OptionDataDir(conf.Root), 1469 nwconfig.OptionExecRoot(conf.GetExecRoot()), 1470 nwconfig.OptionDefaultDriver(string(dd)), 1471 nwconfig.OptionDefaultNetwork(dd.NetworkName()), 1472 nwconfig.OptionLabels(conf.Labels), 1473 nwconfig.OptionNetworkControlPlaneMTU(conf.NetworkControlPlaneMTU), 1474 driverOptions(conf), 1475 } 1476 1477 if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 { 1478 options = append(options, nwconfig.OptionDefaultAddressPoolConfig(conf.NetworkConfig.DefaultAddressPools.Value())) 1479 } 1480 if conf.LiveRestoreEnabled && len(activeSandboxes) != 0 { 1481 options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes)) 1482 } 1483 if pg != nil { 1484 options = append(options, nwconfig.OptionPluginGetter(pg)) 1485 } 1486 1487 return options, nil 1488 } 1489 1490 // GetCluster returns the cluster 1491 func (daemon *Daemon) GetCluster() Cluster { 1492 return daemon.cluster 1493 } 1494 1495 // SetCluster sets the cluster 1496 func (daemon *Daemon) SetCluster(cluster Cluster) { 1497 daemon.cluster = cluster 1498 } 1499 1500 func (daemon *Daemon) pluginShutdown() { 1501 manager := daemon.pluginManager 1502 // Check for a valid manager object. In error conditions, daemon init can fail 1503 // and shutdown called, before plugin manager is initialized. 1504 if manager != nil { 1505 manager.Shutdown() 1506 } 1507 } 1508 1509 // PluginManager returns current pluginManager associated with the daemon 1510 func (daemon *Daemon) PluginManager() *plugin.Manager { // set up before daemon to avoid this method 1511 return daemon.pluginManager 1512 } 1513 1514 // PluginGetter returns current pluginStore associated with the daemon 1515 func (daemon *Daemon) PluginGetter() *plugin.Store { 1516 return daemon.PluginStore 1517 } 1518 1519 // CreateDaemonRoot creates the root for the daemon 1520 func CreateDaemonRoot(config *config.Config) error { 1521 // get the canonical path to the Docker root directory 1522 var realRoot string 1523 if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) { 1524 realRoot = config.Root 1525 } else { 1526 realRoot, err = fileutils.ReadSymlinkedDirectory(config.Root) 1527 if err != nil { 1528 return fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err) 1529 } 1530 } 1531 1532 idMapping, err := setupRemappedRoot(config) 1533 if err != nil { 1534 return err 1535 } 1536 return setupDaemonRoot(config, realRoot, idMapping.RootPair()) 1537 } 1538 1539 // RemapContainerdNamespaces returns the right containerd namespaces to use: 1540 // - if they are not already set in the config file 1541 // - and the daemon is running with user namespace remapping enabled 1542 // Then it will return new namespace names, otherwise it will return the existing 1543 // namespaces 1544 func RemapContainerdNamespaces(config *config.Config) (ns string, pluginNs string, err error) { 1545 idMapping, err := setupRemappedRoot(config) 1546 if err != nil { 1547 return "", "", err 1548 } 1549 if idMapping.Empty() { 1550 return config.ContainerdNamespace, config.ContainerdPluginNamespace, nil 1551 } 1552 root := idMapping.RootPair() 1553 1554 ns = config.ContainerdNamespace 1555 if _, ok := config.ValuesSet["containerd-namespace"]; !ok { 1556 ns = fmt.Sprintf("%s-%d.%d", config.ContainerdNamespace, root.UID, root.GID) 1557 } 1558 1559 pluginNs = config.ContainerdPluginNamespace 1560 if _, ok := config.ValuesSet["containerd-plugin-namespace"]; !ok { 1561 pluginNs = fmt.Sprintf("%s-%d.%d", config.ContainerdPluginNamespace, root.UID, root.GID) 1562 } 1563 1564 return 1565 } 1566 1567 // checkpointAndSave grabs a container lock to safely call container.CheckpointTo 1568 func (daemon *Daemon) checkpointAndSave(container *container.Container) error { 1569 container.Lock() 1570 defer container.Unlock() 1571 if err := container.CheckpointTo(daemon.containersReplica); err != nil { 1572 return fmt.Errorf("Error saving container state: %v", err) 1573 } 1574 return nil 1575 } 1576 1577 // because the CLI sends a -1 when it wants to unset the swappiness value 1578 // we need to clear it on the server side 1579 func fixMemorySwappiness(resources *containertypes.Resources) { 1580 if resources.MemorySwappiness != nil && *resources.MemorySwappiness == -1 { 1581 resources.MemorySwappiness = nil 1582 } 1583 } 1584 1585 // GetAttachmentStore returns current attachment store associated with the daemon 1586 func (daemon *Daemon) GetAttachmentStore() *network.AttachmentStore { 1587 return &daemon.attachmentStore 1588 } 1589 1590 // IdentityMapping returns uid/gid mapping or a SID (in the case of Windows) for the builder 1591 func (daemon *Daemon) IdentityMapping() idtools.IdentityMapping { 1592 return daemon.idMapping 1593 } 1594 1595 // ImageService returns the Daemon's ImageService 1596 func (daemon *Daemon) ImageService() ImageService { 1597 return daemon.imageService 1598 } 1599 1600 // ImageBackend returns an image-backend for Swarm and the distribution router. 1601 func (daemon *Daemon) ImageBackend() executorpkg.ImageBackend { 1602 return &imageBackend{ 1603 ImageService: daemon.imageService, 1604 registryService: daemon.registryService, 1605 } 1606 } 1607 1608 // RegistryService returns the Daemon's RegistryService 1609 func (daemon *Daemon) RegistryService() *registry.Service { 1610 return daemon.registryService 1611 } 1612 1613 // BuilderBackend returns the backend used by builder 1614 func (daemon *Daemon) BuilderBackend() builder.Backend { 1615 return struct { 1616 *Daemon 1617 ImageService 1618 }{daemon, daemon.imageService} 1619 } 1620 1621 // RawSysInfo returns *sysinfo.SysInfo . 1622 func (daemon *Daemon) RawSysInfo() *sysinfo.SysInfo { 1623 daemon.sysInfoOnce.Do(func() { 1624 // We check if sysInfo is not set here, to allow some test to 1625 // override the actual sysInfo. 1626 if daemon.sysInfo == nil { 1627 daemon.sysInfo = getSysInfo(&daemon.config().Config) 1628 } 1629 }) 1630 1631 return daemon.sysInfo 1632 } 1633 1634 // imageBackend is used to satisfy the [executorpkg.ImageBackend] and 1635 // [github.com/docker/docker/api/server/router/distribution.Backend] 1636 // interfaces. 1637 type imageBackend struct { 1638 ImageService 1639 registryService *registry.Service 1640 } 1641 1642 // GetRepositories returns a list of repositories configured for the given 1643 // reference. Multiple repositories can be returned if the reference is for 1644 // the default (Docker Hub) registry and a mirror is configured, but it omits 1645 // registries that were not reachable (pinging the /v2/ endpoint failed). 1646 // 1647 // It returns an error if it was unable to reach any of the registries for 1648 // the given reference, or if the provided reference is invalid. 1649 func (i *imageBackend) GetRepositories(ctx context.Context, ref reference.Named, authConfig *registrytypes.AuthConfig) ([]dist.Repository, error) { 1650 return distribution.GetRepositories(ctx, ref, &distribution.ImagePullConfig{ 1651 Config: distribution.Config{ 1652 AuthConfig: authConfig, 1653 RegistryService: i.registryService, 1654 }, 1655 }) 1656 }