github.com/rish1988/moby@v25.0.2+incompatible/daemon/daemon.go (about)

     1  // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
     2  //go:build go1.19
     3  
     4  // Package daemon exposes the functions that occur on the host server
     5  // that the Docker daemon is running.
     6  //
     7  // In implementing the various functions of the daemon, there is often
     8  // a method-specific struct for configuring the runtime behavior.
     9  package daemon // import "github.com/docker/docker/daemon"
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"net"
    15  	"os"
    16  	"path"
    17  	"path/filepath"
    18  	"runtime"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/containerd/containerd"
    24  	"github.com/containerd/containerd/defaults"
    25  	"github.com/containerd/containerd/pkg/dialer"
    26  	"github.com/containerd/containerd/pkg/userns"
    27  	"github.com/containerd/containerd/remotes/docker"
    28  	"github.com/containerd/log"
    29  	"github.com/distribution/reference"
    30  	dist "github.com/docker/distribution"
    31  	"github.com/docker/docker/api/types"
    32  	"github.com/docker/docker/api/types/backend"
    33  	containertypes "github.com/docker/docker/api/types/container"
    34  	imagetypes "github.com/docker/docker/api/types/image"
    35  	registrytypes "github.com/docker/docker/api/types/registry"
    36  	"github.com/docker/docker/api/types/swarm"
    37  	"github.com/docker/docker/api/types/volume"
    38  	"github.com/docker/docker/builder"
    39  	"github.com/docker/docker/container"
    40  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    41  	"github.com/docker/docker/daemon/config"
    42  	ctrd "github.com/docker/docker/daemon/containerd"
    43  	"github.com/docker/docker/daemon/events"
    44  	_ "github.com/docker/docker/daemon/graphdriver/register" // register graph drivers
    45  	"github.com/docker/docker/daemon/images"
    46  	dlogger "github.com/docker/docker/daemon/logger"
    47  	"github.com/docker/docker/daemon/logger/local"
    48  	"github.com/docker/docker/daemon/network"
    49  	"github.com/docker/docker/daemon/snapshotter"
    50  	"github.com/docker/docker/daemon/stats"
    51  	"github.com/docker/docker/distribution"
    52  	dmetadata "github.com/docker/docker/distribution/metadata"
    53  	"github.com/docker/docker/dockerversion"
    54  	"github.com/docker/docker/errdefs"
    55  	"github.com/docker/docker/image"
    56  	"github.com/docker/docker/internal/compatcontext"
    57  	"github.com/docker/docker/layer"
    58  	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    59  	"github.com/docker/docker/libnetwork"
    60  	"github.com/docker/docker/libnetwork/cluster"
    61  	nwconfig "github.com/docker/docker/libnetwork/config"
    62  	"github.com/docker/docker/pkg/authorization"
    63  	"github.com/docker/docker/pkg/fileutils"
    64  	"github.com/docker/docker/pkg/idtools"
    65  	"github.com/docker/docker/pkg/plugingetter"
    66  	"github.com/docker/docker/pkg/sysinfo"
    67  	"github.com/docker/docker/pkg/system"
    68  	"github.com/docker/docker/plugin"
    69  	pluginexec "github.com/docker/docker/plugin/executor/containerd"
    70  	refstore "github.com/docker/docker/reference"
    71  	"github.com/docker/docker/registry"
    72  	"github.com/docker/docker/runconfig"
    73  	volumesservice "github.com/docker/docker/volume/service"
    74  	"github.com/moby/buildkit/util/resolver"
    75  	resolverconfig "github.com/moby/buildkit/util/resolver/config"
    76  	"github.com/moby/locker"
    77  	"github.com/pkg/errors"
    78  	"go.etcd.io/bbolt"
    79  	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
    80  	"golang.org/x/sync/semaphore"
    81  	"google.golang.org/grpc"
    82  	"google.golang.org/grpc/backoff"
    83  	"google.golang.org/grpc/credentials/insecure"
    84  	"resenje.org/singleflight"
    85  )
    86  
    87  type configStore struct {
    88  	config.Config
    89  
    90  	Runtimes runtimes
    91  }
    92  
    93  // Daemon holds information about the Docker daemon.
    94  type Daemon struct {
    95  	id                    string
    96  	repository            string
    97  	containers            container.Store
    98  	containersReplica     *container.ViewDB
    99  	execCommands          *container.ExecStore
   100  	imageService          ImageService
   101  	configStore           atomic.Pointer[configStore]
   102  	configReload          sync.Mutex
   103  	statsCollector        *stats.Collector
   104  	defaultLogConfig      containertypes.LogConfig
   105  	registryService       *registry.Service
   106  	EventsService         *events.Events
   107  	netController         *libnetwork.Controller
   108  	volumes               *volumesservice.VolumesService
   109  	root                  string
   110  	sysInfoOnce           sync.Once
   111  	sysInfo               *sysinfo.SysInfo
   112  	shutdown              bool
   113  	idMapping             idtools.IdentityMapping
   114  	PluginStore           *plugin.Store // TODO: remove
   115  	pluginManager         *plugin.Manager
   116  	linkIndex             *linkIndex
   117  	containerdClient      *containerd.Client
   118  	containerd            libcontainerdtypes.Client
   119  	defaultIsolation      containertypes.Isolation // Default isolation mode on Windows
   120  	clusterProvider       cluster.Provider
   121  	cluster               Cluster
   122  	genericResources      []swarm.GenericResource
   123  	metricsPluginListener net.Listener
   124  	ReferenceStore        refstore.Store
   125  
   126  	machineMemory uint64
   127  
   128  	seccompProfile     []byte
   129  	seccompProfilePath string
   130  
   131  	usageContainers singleflight.Group[struct{}, []*types.Container]
   132  	usageImages     singleflight.Group[struct{}, []*imagetypes.Summary]
   133  	usageVolumes    singleflight.Group[struct{}, []*volume.Volume]
   134  	usageLayer      singleflight.Group[struct{}, int64]
   135  
   136  	pruneRunning int32
   137  	hosts        map[string]bool // hosts stores the addresses the daemon is listening on
   138  	startupDone  chan struct{}
   139  
   140  	attachmentStore       network.AttachmentStore
   141  	attachableNetworkLock *locker.Locker
   142  
   143  	// This is used for Windows which doesn't currently support running on containerd
   144  	// It stores metadata for the content store (used for manifest caching)
   145  	// This needs to be closed on daemon exit
   146  	mdDB *bbolt.DB
   147  
   148  	usesSnapshotter bool
   149  }
   150  
   151  // ID returns the daemon id
   152  func (daemon *Daemon) ID() string {
   153  	return daemon.id
   154  }
   155  
   156  // StoreHosts stores the addresses the daemon is listening on
   157  func (daemon *Daemon) StoreHosts(hosts []string) {
   158  	if daemon.hosts == nil {
   159  		daemon.hosts = make(map[string]bool)
   160  	}
   161  	for _, h := range hosts {
   162  		daemon.hosts[h] = true
   163  	}
   164  }
   165  
   166  // config returns an immutable snapshot of the current daemon configuration.
   167  // Multiple calls to this function will return the same pointer until the
   168  // configuration is reloaded so callers must take care not to modify the
   169  // returned value.
   170  //
   171  // To ensure that the configuration used remains consistent throughout the
   172  // lifetime of an operation, the configuration pointer should be passed down the
   173  // call stack, like one would a [context.Context] value. Only the entrypoints
   174  // for operations, the outermost functions, should call this function.
   175  func (daemon *Daemon) config() *configStore {
   176  	cfg := daemon.configStore.Load()
   177  	if cfg == nil {
   178  		return &configStore{}
   179  	}
   180  	return cfg
   181  }
   182  
   183  // Config returns daemon's config.
   184  func (daemon *Daemon) Config() config.Config {
   185  	return daemon.config().Config
   186  }
   187  
   188  // HasExperimental returns whether the experimental features of the daemon are enabled or not
   189  func (daemon *Daemon) HasExperimental() bool {
   190  	return daemon.config().Experimental
   191  }
   192  
   193  // Features returns the features map from configStore
   194  func (daemon *Daemon) Features() map[string]bool {
   195  	return daemon.config().Features
   196  }
   197  
   198  // UsesSnapshotter returns true if feature flag to use containerd snapshotter is enabled
   199  func (daemon *Daemon) UsesSnapshotter() bool {
   200  	return daemon.usesSnapshotter
   201  }
   202  
   203  // RegistryHosts returns the registry hosts configuration for the host component
   204  // of a distribution image reference.
   205  func (daemon *Daemon) RegistryHosts(host string) ([]docker.RegistryHost, error) {
   206  	m := map[string]resolverconfig.RegistryConfig{
   207  		"docker.io": {Mirrors: daemon.registryService.ServiceConfig().Mirrors},
   208  	}
   209  	conf := daemon.registryService.ServiceConfig().IndexConfigs
   210  	for k, v := range conf {
   211  		c := m[k]
   212  		if !v.Secure {
   213  			t := true
   214  			c.PlainHTTP = &t
   215  			c.Insecure = &t
   216  		}
   217  		m[k] = c
   218  	}
   219  	if c, ok := m[host]; !ok && daemon.registryService.IsInsecureRegistry(host) {
   220  		t := true
   221  		c.PlainHTTP = &t
   222  		c.Insecure = &t
   223  		m[host] = c
   224  	}
   225  
   226  	for k, v := range m {
   227  		v.TLSConfigDir = []string{registry.HostCertsDir(k)}
   228  		m[k] = v
   229  	}
   230  
   231  	certsDir := registry.CertsDir()
   232  	if fis, err := os.ReadDir(certsDir); err == nil {
   233  		for _, fi := range fis {
   234  			if _, ok := m[fi.Name()]; !ok {
   235  				m[fi.Name()] = resolverconfig.RegistryConfig{
   236  					TLSConfigDir: []string{filepath.Join(certsDir, fi.Name())},
   237  				}
   238  			}
   239  		}
   240  	}
   241  
   242  	return resolver.NewRegistryConfig(m)(host)
   243  }
   244  
   245  // layerAccessor may be implemented by ImageService
   246  type layerAccessor interface {
   247  	GetLayerByID(cid string) (layer.RWLayer, error)
   248  }
   249  
   250  func (daemon *Daemon) restore(cfg *configStore) error {
   251  	var mapLock sync.Mutex
   252  	containers := make(map[string]*container.Container)
   253  
   254  	log.G(context.TODO()).Info("Loading containers: start.")
   255  
   256  	dir, err := os.ReadDir(daemon.repository)
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	// parallelLimit is the maximum number of parallel startup jobs that we
   262  	// allow (this is the limited used for all startup semaphores). The multipler
   263  	// (128) was chosen after some fairly significant benchmarking -- don't change
   264  	// it unless you've tested it significantly (this value is adjusted if
   265  	// RLIMIT_NOFILE is small to avoid EMFILE).
   266  	parallelLimit := adjustParallelLimit(len(dir), 128*runtime.NumCPU())
   267  
   268  	// Re-used for all parallel startup jobs.
   269  	var group sync.WaitGroup
   270  	sem := semaphore.NewWeighted(int64(parallelLimit))
   271  
   272  	for _, v := range dir {
   273  		group.Add(1)
   274  		go func(id string) {
   275  			defer group.Done()
   276  			_ = sem.Acquire(context.Background(), 1)
   277  			defer sem.Release(1)
   278  
   279  			logger := log.G(context.TODO()).WithField("container", id)
   280  
   281  			c, err := daemon.load(id)
   282  			if err != nil {
   283  				logger.WithError(err).Error("failed to load container")
   284  				return
   285  			}
   286  			if c.Driver != daemon.imageService.StorageDriver() {
   287  				// Ignore the container if it wasn't created with the current storage-driver
   288  				logger.Debugf("not restoring container because it was created with another storage driver (%s)", c.Driver)
   289  				return
   290  			}
   291  			if accessor, ok := daemon.imageService.(layerAccessor); ok {
   292  				rwlayer, err := accessor.GetLayerByID(c.ID)
   293  				if err != nil {
   294  					logger.WithError(err).Error("failed to load container mount")
   295  					return
   296  				}
   297  				c.RWLayer = rwlayer
   298  			}
   299  			logger.WithFields(log.Fields{
   300  				"running": c.IsRunning(),
   301  				"paused":  c.IsPaused(),
   302  			}).Debug("loaded container")
   303  
   304  			mapLock.Lock()
   305  			containers[c.ID] = c
   306  			mapLock.Unlock()
   307  		}(v.Name())
   308  	}
   309  	group.Wait()
   310  
   311  	removeContainers := make(map[string]*container.Container)
   312  	restartContainers := make(map[*container.Container]chan struct{})
   313  	activeSandboxes := make(map[string]interface{})
   314  
   315  	for _, c := range containers {
   316  		group.Add(1)
   317  		go func(c *container.Container) {
   318  			defer group.Done()
   319  			_ = sem.Acquire(context.Background(), 1)
   320  			defer sem.Release(1)
   321  
   322  			logger := log.G(context.TODO()).WithField("container", c.ID)
   323  
   324  			if err := daemon.registerName(c); err != nil {
   325  				logger.WithError(err).Errorf("failed to register container name: %s", c.Name)
   326  				mapLock.Lock()
   327  				delete(containers, c.ID)
   328  				mapLock.Unlock()
   329  				return
   330  			}
   331  			if err := daemon.Register(c); err != nil {
   332  				logger.WithError(err).Error("failed to register container")
   333  				mapLock.Lock()
   334  				delete(containers, c.ID)
   335  				mapLock.Unlock()
   336  				return
   337  			}
   338  		}(c)
   339  	}
   340  	group.Wait()
   341  
   342  	for _, c := range containers {
   343  		group.Add(1)
   344  		go func(c *container.Container) {
   345  			defer group.Done()
   346  			_ = sem.Acquire(context.Background(), 1)
   347  			defer sem.Release(1)
   348  
   349  			baseLogger := log.G(context.TODO()).WithField("container", c.ID)
   350  
   351  			if c.HostConfig != nil {
   352  				// Migrate containers that don't have the default ("no") restart-policy set.
   353  				// The RestartPolicy.Name field may be empty for containers that were
   354  				// created with versions before v25.0.0.
   355  				//
   356  				// We also need to set the MaximumRetryCount to 0, to prevent
   357  				// validation from failing (MaximumRetryCount is not allowed if
   358  				// no restart-policy ("none") is set).
   359  				if c.HostConfig.RestartPolicy.Name == "" {
   360  					baseLogger.Debug("migrated restart-policy")
   361  					c.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled
   362  					c.HostConfig.RestartPolicy.MaximumRetryCount = 0
   363  				}
   364  
   365  				// Migrate containers that use the deprecated (and now non-functional)
   366  				// logentries driver. Update them to use the "local" logging driver
   367  				// instead.
   368  				//
   369  				// TODO(thaJeztah): remove logentries check and migration code in release v26.0.0.
   370  				if c.HostConfig.LogConfig.Type == "logentries" {
   371  					baseLogger.Warn("migrated deprecated logentries logging driver")
   372  					c.HostConfig.LogConfig = containertypes.LogConfig{
   373  						Type: local.Name,
   374  					}
   375  				}
   376  			}
   377  
   378  			if err := daemon.checkpointAndSave(c); err != nil {
   379  				baseLogger.WithError(err).Error("failed to save migrated container config to disk")
   380  			}
   381  
   382  			daemon.setStateCounter(c)
   383  
   384  			logger := func(c *container.Container) *log.Entry {
   385  				return baseLogger.WithFields(log.Fields{
   386  					"running":    c.IsRunning(),
   387  					"paused":     c.IsPaused(),
   388  					"restarting": c.IsRestarting(),
   389  				})
   390  			}
   391  
   392  			logger(c).Debug("restoring container")
   393  
   394  			var es *containerd.ExitStatus
   395  
   396  			if err := c.RestoreTask(context.Background(), daemon.containerd); err != nil && !errdefs.IsNotFound(err) {
   397  				logger(c).WithError(err).Error("failed to restore container with containerd")
   398  				return
   399  			}
   400  
   401  			alive := false
   402  			status := containerd.Unknown
   403  			if tsk, ok := c.Task(); ok {
   404  				s, err := tsk.Status(context.Background())
   405  				if err != nil {
   406  					logger(c).WithError(err).Error("failed to get task status")
   407  				} else {
   408  					status = s.Status
   409  					alive = status != containerd.Stopped
   410  					if !alive {
   411  						logger(c).Debug("cleaning up dead container process")
   412  						es, err = tsk.Delete(context.Background())
   413  						if err != nil && !errdefs.IsNotFound(err) {
   414  							logger(c).WithError(err).Error("failed to delete task from containerd")
   415  							return
   416  						}
   417  					} else if !cfg.LiveRestoreEnabled {
   418  						logger(c).Debug("shutting down container considered alive by containerd")
   419  						if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
   420  							baseLogger.WithError(err).Error("error shutting down container")
   421  							return
   422  						}
   423  						status = containerd.Stopped
   424  						alive = false
   425  						c.ResetRestartManager(false)
   426  					}
   427  				}
   428  			}
   429  			// If the containerd task for the container was not found, docker's view of the
   430  			// container state will be updated accordingly via SetStopped further down.
   431  
   432  			if c.IsRunning() || c.IsPaused() {
   433  				logger(c).Debug("syncing container on disk state with real state")
   434  
   435  				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
   436  
   437  				switch {
   438  				case c.IsPaused() && alive:
   439  					logger(c).WithField("state", status).Info("restored container paused")
   440  					switch status {
   441  					case containerd.Paused, containerd.Pausing:
   442  						// nothing to do
   443  					case containerd.Unknown, containerd.Stopped, "":
   444  						baseLogger.WithField("status", status).Error("unexpected status for paused container during restore")
   445  					default:
   446  						// running
   447  						c.Lock()
   448  						c.Paused = false
   449  						daemon.setStateCounter(c)
   450  						daemon.initHealthMonitor(c)
   451  						if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   452  							baseLogger.WithError(err).Error("failed to update paused container state")
   453  						}
   454  						c.Unlock()
   455  					}
   456  				case !c.IsPaused() && alive:
   457  					logger(c).Debug("restoring healthcheck")
   458  					c.Lock()
   459  					daemon.initHealthMonitor(c)
   460  					c.Unlock()
   461  				}
   462  
   463  				if !alive {
   464  					logger(c).Debug("setting stopped state")
   465  					c.Lock()
   466  					var ces container.ExitStatus
   467  					if es != nil {
   468  						ces.ExitCode = int(es.ExitCode())
   469  						ces.ExitedAt = es.ExitTime()
   470  					} else {
   471  						ces.ExitCode = 255
   472  					}
   473  					c.SetStopped(&ces)
   474  					daemon.Cleanup(context.TODO(), c)
   475  					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   476  						baseLogger.WithError(err).Error("failed to update stopped container state")
   477  					}
   478  					c.Unlock()
   479  					logger(c).Debug("set stopped state")
   480  				}
   481  
   482  				// we call Mount and then Unmount to get BaseFs of the container
   483  				if err := daemon.Mount(c); err != nil {
   484  					// The mount is unlikely to fail. However, in case mount fails
   485  					// the container should be allowed to restore here. Some functionalities
   486  					// (like docker exec -u user) might be missing but container is able to be
   487  					// stopped/restarted/removed.
   488  					// See #29365 for related information.
   489  					// The error is only logged here.
   490  					logger(c).WithError(err).Warn("failed to mount container to get BaseFs path")
   491  				} else {
   492  					if err := daemon.Unmount(c); err != nil {
   493  						logger(c).WithError(err).Warn("failed to umount container to get BaseFs path")
   494  					}
   495  				}
   496  
   497  				c.ResetRestartManager(false)
   498  				if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
   499  					options, err := daemon.buildSandboxOptions(&cfg.Config, c)
   500  					if err != nil {
   501  						logger(c).WithError(err).Warn("failed to build sandbox option to restore container")
   502  					}
   503  					mapLock.Lock()
   504  					activeSandboxes[c.NetworkSettings.SandboxID] = options
   505  					mapLock.Unlock()
   506  				}
   507  			}
   508  
   509  			// get list of containers we need to restart
   510  
   511  			// Do not autostart containers which
   512  			// has endpoints in a swarm scope
   513  			// network yet since the cluster is
   514  			// not initialized yet. We will start
   515  			// it after the cluster is
   516  			// initialized.
   517  			if cfg.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
   518  				mapLock.Lock()
   519  				restartContainers[c] = make(chan struct{})
   520  				mapLock.Unlock()
   521  			} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
   522  				// Remove the container if live-restore is disabled or if the container has already exited.
   523  				if !cfg.LiveRestoreEnabled || !alive {
   524  					mapLock.Lock()
   525  					removeContainers[c.ID] = c
   526  					mapLock.Unlock()
   527  				}
   528  			}
   529  
   530  			c.Lock()
   531  			if c.RemovalInProgress {
   532  				// We probably crashed in the middle of a removal, reset
   533  				// the flag.
   534  				//
   535  				// We DO NOT remove the container here as we do not
   536  				// know if the user had requested for either the
   537  				// associated volumes, network links or both to also
   538  				// be removed. So we put the container in the "dead"
   539  				// state and leave further processing up to them.
   540  				c.RemovalInProgress = false
   541  				c.Dead = true
   542  				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   543  					baseLogger.WithError(err).Error("failed to update RemovalInProgress container state")
   544  				} else {
   545  					baseLogger.Debugf("reset RemovalInProgress state for container")
   546  				}
   547  			}
   548  			c.Unlock()
   549  			logger(c).Debug("done restoring container")
   550  		}(c)
   551  	}
   552  	group.Wait()
   553  
   554  	// Initialize the network controller and configure network settings.
   555  	//
   556  	// Note that we cannot initialize the network controller earlier, as it
   557  	// needs to know if there's active sandboxes (running containers).
   558  	if err = daemon.initNetworkController(&cfg.Config, activeSandboxes); err != nil {
   559  		return fmt.Errorf("Error initializing network controller: %v", err)
   560  	}
   561  
   562  	// Now that all the containers are registered, register the links
   563  	for _, c := range containers {
   564  		group.Add(1)
   565  		go func(c *container.Container) {
   566  			_ = sem.Acquire(context.Background(), 1)
   567  
   568  			if err := daemon.registerLinks(c, c.HostConfig); err != nil {
   569  				log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to register link for container")
   570  			}
   571  
   572  			sem.Release(1)
   573  			group.Done()
   574  		}(c)
   575  	}
   576  	group.Wait()
   577  
   578  	for c, notifyChan := range restartContainers {
   579  		group.Add(1)
   580  		go func(c *container.Container, chNotify chan struct{}) {
   581  			_ = sem.Acquire(context.Background(), 1)
   582  
   583  			logger := log.G(context.TODO()).WithField("container", c.ID)
   584  
   585  			logger.Debug("starting container")
   586  
   587  			// ignore errors here as this is a best effort to wait for children to be
   588  			//   running before we try to start the container
   589  			children := daemon.children(c)
   590  			timeout := time.NewTimer(5 * time.Second)
   591  			defer timeout.Stop()
   592  
   593  			for _, child := range children {
   594  				if notifier, exists := restartContainers[child]; exists {
   595  					select {
   596  					case <-notifier:
   597  					case <-timeout.C:
   598  					}
   599  				}
   600  			}
   601  
   602  			if err := daemon.prepareMountPoints(c); err != nil {
   603  				logger.WithError(err).Error("failed to prepare mount points for container")
   604  			}
   605  			if err := daemon.containerStart(context.Background(), cfg, c, "", "", true); err != nil {
   606  				logger.WithError(err).Error("failed to start container")
   607  			}
   608  			close(chNotify)
   609  
   610  			sem.Release(1)
   611  			group.Done()
   612  		}(c, notifyChan)
   613  	}
   614  	group.Wait()
   615  
   616  	for id := range removeContainers {
   617  		group.Add(1)
   618  		go func(cid string) {
   619  			_ = sem.Acquire(context.Background(), 1)
   620  
   621  			if err := daemon.containerRm(&cfg.Config, cid, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
   622  				log.G(context.TODO()).WithField("container", cid).WithError(err).Error("failed to remove container")
   623  			}
   624  
   625  			sem.Release(1)
   626  			group.Done()
   627  		}(id)
   628  	}
   629  	group.Wait()
   630  
   631  	// any containers that were started above would already have had this done,
   632  	// however we need to now prepare the mountpoints for the rest of the containers as well.
   633  	// This shouldn't cause any issue running on the containers that already had this run.
   634  	// This must be run after any containers with a restart policy so that containerized plugins
   635  	// can have a chance to be running before we try to initialize them.
   636  	for _, c := range containers {
   637  		// if the container has restart policy, do not
   638  		// prepare the mountpoints since it has been done on restarting.
   639  		// This is to speed up the daemon start when a restart container
   640  		// has a volume and the volume driver is not available.
   641  		if _, ok := restartContainers[c]; ok {
   642  			continue
   643  		} else if _, ok := removeContainers[c.ID]; ok {
   644  			// container is automatically removed, skip it.
   645  			continue
   646  		}
   647  
   648  		group.Add(1)
   649  		go func(c *container.Container) {
   650  			_ = sem.Acquire(context.Background(), 1)
   651  
   652  			if err := daemon.prepareMountPoints(c); err != nil {
   653  				log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
   654  			}
   655  
   656  			sem.Release(1)
   657  			group.Done()
   658  		}(c)
   659  	}
   660  	group.Wait()
   661  
   662  	log.G(context.TODO()).Info("Loading containers: done.")
   663  
   664  	return nil
   665  }
   666  
   667  // RestartSwarmContainers restarts any autostart container which has a
   668  // swarm endpoint.
   669  func (daemon *Daemon) RestartSwarmContainers() {
   670  	daemon.restartSwarmContainers(context.Background(), daemon.config())
   671  }
   672  
   673  func (daemon *Daemon) restartSwarmContainers(ctx context.Context, cfg *configStore) {
   674  	// parallelLimit is the maximum number of parallel startup jobs that we
   675  	// allow (this is the limited used for all startup semaphores). The multipler
   676  	// (128) was chosen after some fairly significant benchmarking -- don't change
   677  	// it unless you've tested it significantly (this value is adjusted if
   678  	// RLIMIT_NOFILE is small to avoid EMFILE).
   679  	parallelLimit := adjustParallelLimit(len(daemon.List()), 128*runtime.NumCPU())
   680  
   681  	var group sync.WaitGroup
   682  	sem := semaphore.NewWeighted(int64(parallelLimit))
   683  
   684  	for _, c := range daemon.List() {
   685  		if !c.IsRunning() && !c.IsPaused() {
   686  			// Autostart all the containers which has a
   687  			// swarm endpoint now that the cluster is
   688  			// initialized.
   689  			if cfg.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
   690  				group.Add(1)
   691  				go func(c *container.Container) {
   692  					if err := sem.Acquire(ctx, 1); err != nil {
   693  						// ctx is done.
   694  						group.Done()
   695  						return
   696  					}
   697  
   698  					if err := daemon.containerStart(ctx, cfg, c, "", "", true); err != nil {
   699  						log.G(ctx).WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
   700  					}
   701  
   702  					sem.Release(1)
   703  					group.Done()
   704  				}(c)
   705  			}
   706  		}
   707  	}
   708  	group.Wait()
   709  }
   710  
   711  func (daemon *Daemon) children(c *container.Container) map[string]*container.Container {
   712  	return daemon.linkIndex.children(c)
   713  }
   714  
   715  // parents returns the names of the parent containers of the container
   716  // with the given name.
   717  func (daemon *Daemon) parents(c *container.Container) map[string]*container.Container {
   718  	return daemon.linkIndex.parents(c)
   719  }
   720  
   721  func (daemon *Daemon) registerLink(parent, child *container.Container, alias string) error {
   722  	fullName := path.Join(parent.Name, alias)
   723  	if err := daemon.containersReplica.ReserveName(fullName, child.ID); err != nil {
   724  		if errors.Is(err, container.ErrNameReserved) {
   725  			log.G(context.TODO()).Warnf("error registering link for %s, to %s, as alias %s, ignoring: %v", parent.ID, child.ID, alias, err)
   726  			return nil
   727  		}
   728  		return err
   729  	}
   730  	daemon.linkIndex.link(parent, child, fullName)
   731  	return nil
   732  }
   733  
   734  // DaemonJoinsCluster informs the daemon has joined the cluster and provides
   735  // the handler to query the cluster component
   736  func (daemon *Daemon) DaemonJoinsCluster(clusterProvider cluster.Provider) {
   737  	daemon.setClusterProvider(clusterProvider)
   738  }
   739  
   740  // DaemonLeavesCluster informs the daemon has left the cluster
   741  func (daemon *Daemon) DaemonLeavesCluster() {
   742  	// Daemon is in charge of removing the attachable networks with
   743  	// connected containers when the node leaves the swarm
   744  	daemon.clearAttachableNetworks()
   745  	// We no longer need the cluster provider, stop it now so that
   746  	// the network agent will stop listening to cluster events.
   747  	daemon.setClusterProvider(nil)
   748  	// Wait for the networking cluster agent to stop
   749  	daemon.netController.AgentStopWait()
   750  	// Daemon is in charge of removing the ingress network when the
   751  	// node leaves the swarm. Wait for job to be done or timeout.
   752  	// This is called also on graceful daemon shutdown. We need to
   753  	// wait, because the ingress release has to happen before the
   754  	// network controller is stopped.
   755  
   756  	if done, err := daemon.ReleaseIngress(); err == nil {
   757  		timeout := time.NewTimer(5 * time.Second)
   758  		defer timeout.Stop()
   759  
   760  		select {
   761  		case <-done:
   762  		case <-timeout.C:
   763  			log.G(context.TODO()).Warn("timeout while waiting for ingress network removal")
   764  		}
   765  	} else {
   766  		log.G(context.TODO()).Warnf("failed to initiate ingress network removal: %v", err)
   767  	}
   768  
   769  	daemon.attachmentStore.ClearAttachments()
   770  }
   771  
   772  // setClusterProvider sets a component for querying the current cluster state.
   773  func (daemon *Daemon) setClusterProvider(clusterProvider cluster.Provider) {
   774  	daemon.clusterProvider = clusterProvider
   775  	daemon.netController.SetClusterProvider(clusterProvider)
   776  	daemon.attachableNetworkLock = locker.New()
   777  }
   778  
   779  // IsSwarmCompatible verifies if the current daemon
   780  // configuration is compatible with the swarm mode
   781  func (daemon *Daemon) IsSwarmCompatible() error {
   782  	return daemon.config().IsSwarmCompatible()
   783  }
   784  
   785  // NewDaemon sets up everything for the daemon to be able to service
   786  // requests from the webserver.
   787  func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.Store, authzMiddleware *authorization.Middleware) (daemon *Daemon, err error) {
   788  	// Verify platform-specific requirements.
   789  	// TODO(thaJeztah): this should be called before we try to create the daemon; perhaps together with the config validation.
   790  	if err := checkSystem(); err != nil {
   791  		return nil, err
   792  	}
   793  
   794  	registryService, err := registry.NewService(config.ServiceOptions)
   795  	if err != nil {
   796  		return nil, err
   797  	}
   798  
   799  	// Ensure that we have a correct root key limit for launching containers.
   800  	if err := modifyRootKeyLimit(); err != nil {
   801  		log.G(ctx).Warnf("unable to modify root key limit, number of containers could be limited by this quota: %v", err)
   802  	}
   803  
   804  	// Ensure we have compatible and valid configuration options
   805  	if err := verifyDaemonSettings(config); err != nil {
   806  		return nil, err
   807  	}
   808  
   809  	// Do we have a disabled network?
   810  	config.DisableBridge = isBridgeNetworkDisabled(config)
   811  
   812  	// Setup the resolv.conf
   813  	setupResolvConf(config)
   814  
   815  	idMapping, err := setupRemappedRoot(config)
   816  	if err != nil {
   817  		return nil, err
   818  	}
   819  	rootIDs := idMapping.RootPair()
   820  	if err := setMayDetachMounts(); err != nil {
   821  		log.G(ctx).WithError(err).Warn("Could not set may_detach_mounts kernel parameter")
   822  	}
   823  
   824  	// set up the tmpDir to use a canonical path
   825  	tmp, err := prepareTempDir(config.Root)
   826  	if err != nil {
   827  		return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err)
   828  	}
   829  	realTmp, err := fileutils.ReadSymlinkedDirectory(tmp)
   830  	if err != nil {
   831  		return nil, fmt.Errorf("Unable to get the full path to the TempDir (%s): %s", tmp, err)
   832  	}
   833  	if isWindows {
   834  		if err := system.MkdirAll(realTmp, 0); err != nil {
   835  			return nil, fmt.Errorf("Unable to create the TempDir (%s): %s", realTmp, err)
   836  		}
   837  		os.Setenv("TEMP", realTmp)
   838  		os.Setenv("TMP", realTmp)
   839  	} else {
   840  		os.Setenv("TMPDIR", realTmp)
   841  	}
   842  
   843  	if err := initRuntimesDir(config); err != nil {
   844  		return nil, err
   845  	}
   846  	rts, err := setupRuntimes(config)
   847  	if err != nil {
   848  		return nil, err
   849  	}
   850  
   851  	d := &Daemon{
   852  		PluginStore: pluginStore,
   853  		startupDone: make(chan struct{}),
   854  	}
   855  	cfgStore := &configStore{
   856  		Config:   *config,
   857  		Runtimes: rts,
   858  	}
   859  	d.configStore.Store(cfgStore)
   860  
   861  	// TEST_INTEGRATION_USE_SNAPSHOTTER is used for integration tests only.
   862  	if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
   863  		d.usesSnapshotter = true
   864  	} else {
   865  		d.usesSnapshotter = config.Features["containerd-snapshotter"]
   866  	}
   867  
   868  	// Ensure the daemon is properly shutdown if there is a failure during
   869  	// initialization
   870  	defer func() {
   871  		if err != nil {
   872  			// Use a fresh context here. Passed context could be cancelled.
   873  			if err := d.Shutdown(context.Background()); err != nil {
   874  				log.G(ctx).Error(err)
   875  			}
   876  		}
   877  	}()
   878  
   879  	if err := d.setGenericResources(&cfgStore.Config); err != nil {
   880  		return nil, err
   881  	}
   882  	// set up SIGUSR1 handler on Unix-like systems, or a Win32 global event
   883  	// on Windows to dump Go routine stacks
   884  	stackDumpDir := cfgStore.Root
   885  	if execRoot := cfgStore.GetExecRoot(); execRoot != "" {
   886  		stackDumpDir = execRoot
   887  	}
   888  	d.setupDumpStackTrap(stackDumpDir)
   889  
   890  	if err := d.setupSeccompProfile(&cfgStore.Config); err != nil {
   891  		return nil, err
   892  	}
   893  
   894  	// Set the default isolation mode (only applicable on Windows)
   895  	if err := d.setDefaultIsolation(&cfgStore.Config); err != nil {
   896  		return nil, fmt.Errorf("error setting default isolation mode: %v", err)
   897  	}
   898  
   899  	if err := configureMaxThreads(&cfgStore.Config); err != nil {
   900  		log.G(ctx).Warnf("Failed to configure golang's threads limit: %v", err)
   901  	}
   902  
   903  	// ensureDefaultAppArmorProfile does nothing if apparmor is disabled
   904  	if err := ensureDefaultAppArmorProfile(); err != nil {
   905  		log.G(ctx).Errorf(err.Error())
   906  	}
   907  
   908  	daemonRepo := filepath.Join(cfgStore.Root, "containers")
   909  	if err := idtools.MkdirAllAndChown(daemonRepo, 0o710, idtools.Identity{
   910  		UID: idtools.CurrentIdentity().UID,
   911  		GID: rootIDs.GID,
   912  	}); err != nil {
   913  		return nil, err
   914  	}
   915  
   916  	if isWindows {
   917  		// Note that permissions (0o700) are ignored on Windows; passing them to
   918  		// show intent only. We could consider using idtools.MkdirAndChown here
   919  		// to apply an ACL.
   920  		if err = os.Mkdir(filepath.Join(cfgStore.Root, "credentialspecs"), 0o700); err != nil && !errors.Is(err, os.ErrExist) {
   921  			return nil, err
   922  		}
   923  	}
   924  
   925  	d.registryService = registryService
   926  	dlogger.RegisterPluginGetter(d.PluginStore)
   927  
   928  	metricsSockPath, err := d.listenMetricsSock(&cfgStore.Config)
   929  	if err != nil {
   930  		return nil, err
   931  	}
   932  	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
   933  
   934  	backoffConfig := backoff.DefaultConfig
   935  	backoffConfig.MaxDelay = 3 * time.Second
   936  	connParams := grpc.ConnectParams{
   937  		Backoff: backoffConfig,
   938  	}
   939  	gopts := []grpc.DialOption{
   940  		// WithBlock makes sure that the following containerd request
   941  		// is reliable.
   942  		//
   943  		// NOTE: In one edge case with high load pressure, kernel kills
   944  		// dockerd, containerd and containerd-shims caused by OOM.
   945  		// When both dockerd and containerd restart, but containerd
   946  		// will take time to recover all the existing containers. Before
   947  		// containerd serving, dockerd will failed with gRPC error.
   948  		// That bad thing is that restore action will still ignore the
   949  		// any non-NotFound errors and returns running state for
   950  		// already stopped container. It is unexpected behavior. And
   951  		// we need to restart dockerd to make sure that anything is OK.
   952  		//
   953  		// It is painful. Add WithBlock can prevent the edge case. And
   954  		// n common case, the containerd will be serving in shortly.
   955  		// It is not harm to add WithBlock for containerd connection.
   956  		grpc.WithBlock(),
   957  
   958  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   959  		grpc.WithConnectParams(connParams),
   960  		grpc.WithContextDialer(dialer.ContextDialer),
   961  
   962  		// TODO(stevvooe): We may need to allow configuration of this on the client.
   963  		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
   964  		grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
   965  		grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()),
   966  		grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor()),
   967  	}
   968  
   969  	if cfgStore.ContainerdAddr != "" {
   970  		d.containerdClient, err = containerd.New(
   971  			cfgStore.ContainerdAddr,
   972  			containerd.WithDefaultNamespace(cfgStore.ContainerdNamespace),
   973  			containerd.WithDialOpts(gopts),
   974  			containerd.WithTimeout(60*time.Second),
   975  		)
   976  		if err != nil {
   977  			return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
   978  		}
   979  	}
   980  
   981  	createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
   982  		var pluginCli *containerd.Client
   983  
   984  		if cfgStore.ContainerdAddr != "" {
   985  			pluginCli, err = containerd.New(
   986  				cfgStore.ContainerdAddr,
   987  				containerd.WithDefaultNamespace(cfgStore.ContainerdPluginNamespace),
   988  				containerd.WithDialOpts(gopts),
   989  				containerd.WithTimeout(60*time.Second),
   990  			)
   991  			if err != nil {
   992  				return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
   993  			}
   994  		}
   995  
   996  		var (
   997  			shim     string
   998  			shimOpts interface{}
   999  		)
  1000  		if runtime.GOOS != "windows" {
  1001  			shim, shimOpts, err = rts.Get("")
  1002  			if err != nil {
  1003  				return nil, err
  1004  			}
  1005  		}
  1006  		return pluginexec.New(ctx, getPluginExecRoot(&cfgStore.Config), pluginCli, cfgStore.ContainerdPluginNamespace, m, shim, shimOpts)
  1007  	}
  1008  
  1009  	// Plugin system initialization should happen before restore. Do not change order.
  1010  	d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{
  1011  		Root:               filepath.Join(cfgStore.Root, "plugins"),
  1012  		ExecRoot:           getPluginExecRoot(&cfgStore.Config),
  1013  		Store:              d.PluginStore,
  1014  		CreateExecutor:     createPluginExec,
  1015  		RegistryService:    registryService,
  1016  		LiveRestoreEnabled: cfgStore.LiveRestoreEnabled,
  1017  		LogPluginEvent:     d.LogPluginEvent, // todo: make private
  1018  		AuthzMiddleware:    authzMiddleware,
  1019  	})
  1020  	if err != nil {
  1021  		return nil, errors.Wrap(err, "couldn't create plugin manager")
  1022  	}
  1023  
  1024  	d.defaultLogConfig, err = defaultLogConfig(&cfgStore.Config)
  1025  	if err != nil {
  1026  		return nil, errors.Wrap(err, "failed to set log opts")
  1027  	}
  1028  	log.G(ctx).Debugf("Using default logging driver %s", d.defaultLogConfig.Type)
  1029  
  1030  	d.volumes, err = volumesservice.NewVolumeService(cfgStore.Root, d.PluginStore, rootIDs, d)
  1031  	if err != nil {
  1032  		return nil, err
  1033  	}
  1034  
  1035  	// Check if Devices cgroup is mounted, it is hard requirement for container security,
  1036  	// on Linux.
  1037  	//
  1038  	// Important: we call getSysInfo() directly here, without storing the results,
  1039  	// as networking has not yet been set up, so we only have partial system info
  1040  	// at this point.
  1041  	//
  1042  	// TODO(thaJeztah) add a utility to only collect the CgroupDevicesEnabled information
  1043  	if runtime.GOOS == "linux" && !userns.RunningInUserNS() && !getSysInfo(&cfgStore.Config).CgroupDevicesEnabled {
  1044  		return nil, errors.New("Devices cgroup isn't mounted")
  1045  	}
  1046  
  1047  	d.id, err = LoadOrCreateID(cfgStore.Root)
  1048  	if err != nil {
  1049  		return nil, err
  1050  	}
  1051  	d.repository = daemonRepo
  1052  	d.containers = container.NewMemoryStore()
  1053  	if d.containersReplica, err = container.NewViewDB(); err != nil {
  1054  		return nil, err
  1055  	}
  1056  	d.execCommands = container.NewExecStore()
  1057  	d.statsCollector = d.newStatsCollector(1 * time.Second)
  1058  
  1059  	d.EventsService = events.New()
  1060  	d.root = cfgStore.Root
  1061  	d.idMapping = idMapping
  1062  
  1063  	d.linkIndex = newLinkIndex()
  1064  
  1065  	// On Windows we don't support the environment variable, or a user supplied graphdriver
  1066  	// Unix platforms however run a single graphdriver for all containers, and it can
  1067  	// be set through an environment variable, a daemon start parameter, or chosen through
  1068  	// initialization of the layerstore through driver priority order for example.
  1069  	driverName := os.Getenv("DOCKER_DRIVER")
  1070  	if isWindows && d.UsesSnapshotter() {
  1071  		// Containerd WCOW snapshotter
  1072  		driverName = "windows"
  1073  	} else if isWindows {
  1074  		// Docker WCOW graphdriver
  1075  		driverName = "windowsfilter"
  1076  	} else if driverName != "" {
  1077  		log.G(ctx).Infof("Setting the storage driver from the $DOCKER_DRIVER environment variable (%s)", driverName)
  1078  	} else {
  1079  		driverName = cfgStore.GraphDriver
  1080  	}
  1081  
  1082  	if d.UsesSnapshotter() {
  1083  		if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
  1084  			log.G(ctx).Warn("Enabling containerd snapshotter through the $TEST_INTEGRATION_USE_SNAPSHOTTER environment variable. This should only be used for testing.")
  1085  		}
  1086  		log.G(ctx).Info("Starting daemon with containerd snapshotter integration enabled")
  1087  
  1088  		// FIXME(thaJeztah): implement automatic snapshotter-selection similar to graph-driver selection; see https://github.com/moby/moby/issues/44076
  1089  		if driverName == "" {
  1090  			driverName = containerd.DefaultSnapshotter
  1091  		}
  1092  
  1093  		// Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  1094  		// operation only, so it is safe to pass *just* the runtime OS graphdriver.
  1095  		if err := configureKernelSecuritySupport(&cfgStore.Config, driverName); err != nil {
  1096  			return nil, err
  1097  		}
  1098  		d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{
  1099  			Client:          d.containerdClient,
  1100  			Containers:      d.containers,
  1101  			Snapshotter:     driverName,
  1102  			RegistryHosts:   d.RegistryHosts,
  1103  			Registry:        d.registryService,
  1104  			EventsService:   d.EventsService,
  1105  			IDMapping:       idMapping,
  1106  			RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping),
  1107  		})
  1108  	} else {
  1109  		layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{
  1110  			Root:                      cfgStore.Root,
  1111  			MetadataStorePathTemplate: filepath.Join(cfgStore.Root, "image", "%s", "layerdb"),
  1112  			GraphDriver:               driverName,
  1113  			GraphDriverOptions:        cfgStore.GraphOptions,
  1114  			IDMapping:                 idMapping,
  1115  			PluginGetter:              d.PluginStore,
  1116  			ExperimentalEnabled:       cfgStore.Experimental,
  1117  		})
  1118  		if err != nil {
  1119  			return nil, err
  1120  		}
  1121  
  1122  		// Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  1123  		// operation only, so it is safe to pass *just* the runtime OS graphdriver.
  1124  		if err := configureKernelSecuritySupport(&cfgStore.Config, layerStore.DriverName()); err != nil {
  1125  			return nil, err
  1126  		}
  1127  
  1128  		imageRoot := filepath.Join(cfgStore.Root, "image", layerStore.DriverName())
  1129  		ifs, err := image.NewFSStoreBackend(filepath.Join(imageRoot, "imagedb"))
  1130  		if err != nil {
  1131  			return nil, err
  1132  		}
  1133  
  1134  		// We have a single tag/reference store for the daemon globally. However, it's
  1135  		// stored under the graphdriver. On host platforms which only support a single
  1136  		// container OS, but multiple selectable graphdrivers, this means depending on which
  1137  		// graphdriver is chosen, the global reference store is under there. For
  1138  		// platforms which support multiple container operating systems, this is slightly
  1139  		// more problematic as where does the global ref store get located? Fortunately,
  1140  		// for Windows, which is currently the only daemon supporting multiple container
  1141  		// operating systems, the list of graphdrivers available isn't user configurable.
  1142  		// For backwards compatibility, we just put it under the windowsfilter
  1143  		// directory regardless.
  1144  		refStoreLocation := filepath.Join(imageRoot, `repositories.json`)
  1145  		rs, err := refstore.NewReferenceStore(refStoreLocation)
  1146  		if err != nil {
  1147  			return nil, fmt.Errorf("Couldn't create reference store repository: %s", err)
  1148  		}
  1149  		d.ReferenceStore = rs
  1150  
  1151  		imageStore, err := image.NewImageStore(ifs, layerStore)
  1152  		if err != nil {
  1153  			return nil, err
  1154  		}
  1155  
  1156  		distributionMetadataStore, err := dmetadata.NewFSMetadataStore(filepath.Join(imageRoot, "distribution"))
  1157  		if err != nil {
  1158  			return nil, err
  1159  		}
  1160  
  1161  		imgSvcConfig := images.ImageServiceConfig{
  1162  			ContainerStore:            d.containers,
  1163  			DistributionMetadataStore: distributionMetadataStore,
  1164  			EventsService:             d.EventsService,
  1165  			ImageStore:                imageStore,
  1166  			LayerStore:                layerStore,
  1167  			MaxConcurrentDownloads:    config.MaxConcurrentDownloads,
  1168  			MaxConcurrentUploads:      config.MaxConcurrentUploads,
  1169  			MaxDownloadAttempts:       config.MaxDownloadAttempts,
  1170  			ReferenceStore:            rs,
  1171  			RegistryService:           registryService,
  1172  			ContentNamespace:          config.ContainerdNamespace,
  1173  		}
  1174  
  1175  		// containerd is not currently supported with Windows.
  1176  		// So sometimes d.containerdCli will be nil
  1177  		// In that case we'll create a local content store... but otherwise we'll use containerd
  1178  		if d.containerdClient != nil {
  1179  			imgSvcConfig.Leases = d.containerdClient.LeasesService()
  1180  			imgSvcConfig.ContentStore = d.containerdClient.ContentStore()
  1181  		} else {
  1182  			imgSvcConfig.ContentStore, imgSvcConfig.Leases, err = d.configureLocalContentStore(config.ContainerdNamespace)
  1183  			if err != nil {
  1184  				return nil, err
  1185  			}
  1186  		}
  1187  
  1188  		// TODO: imageStore, distributionMetadataStore, and ReferenceStore are only
  1189  		// used above to run migration. They could be initialized in ImageService
  1190  		// if migration is called from daemon/images. layerStore might move as well.
  1191  		d.imageService = images.NewImageService(imgSvcConfig)
  1192  
  1193  		log.G(ctx).Debugf("Max Concurrent Downloads: %d", imgSvcConfig.MaxConcurrentDownloads)
  1194  		log.G(ctx).Debugf("Max Concurrent Uploads: %d", imgSvcConfig.MaxConcurrentUploads)
  1195  		log.G(ctx).Debugf("Max Download Attempts: %d", imgSvcConfig.MaxDownloadAttempts)
  1196  	}
  1197  
  1198  	go d.execCommandGC()
  1199  
  1200  	if err := d.initLibcontainerd(ctx, &cfgStore.Config); err != nil {
  1201  		return nil, err
  1202  	}
  1203  
  1204  	if err := d.restore(cfgStore); err != nil {
  1205  		return nil, err
  1206  	}
  1207  	close(d.startupDone)
  1208  
  1209  	info, err := d.SystemInfo(ctx)
  1210  	if err != nil {
  1211  		return nil, err
  1212  	}
  1213  	for _, w := range info.Warnings {
  1214  		log.G(ctx).Warn(w)
  1215  	}
  1216  
  1217  	engineInfo.WithValues(
  1218  		dockerversion.Version,
  1219  		dockerversion.GitCommit,
  1220  		info.Architecture,
  1221  		info.Driver,
  1222  		info.KernelVersion,
  1223  		info.OperatingSystem,
  1224  		info.OSType,
  1225  		info.OSVersion,
  1226  		info.ID,
  1227  	).Set(1)
  1228  	engineCpus.Set(float64(info.NCPU))
  1229  	engineMemory.Set(float64(info.MemTotal))
  1230  
  1231  	log.G(ctx).WithFields(log.Fields{
  1232  		"version":                dockerversion.Version,
  1233  		"commit":                 dockerversion.GitCommit,
  1234  		"storage-driver":         d.ImageService().StorageDriver(),
  1235  		"containerd-snapshotter": d.UsesSnapshotter(),
  1236  	}).Info("Docker daemon")
  1237  
  1238  	return d, nil
  1239  }
  1240  
  1241  // DistributionServices returns services controlling daemon storage
  1242  func (daemon *Daemon) DistributionServices() images.DistributionServices {
  1243  	return daemon.imageService.DistributionServices()
  1244  }
  1245  
  1246  func (daemon *Daemon) waitForStartupDone() {
  1247  	<-daemon.startupDone
  1248  }
  1249  
  1250  func (daemon *Daemon) shutdownContainer(c *container.Container) error {
  1251  	ctx := compatcontext.WithoutCancel(context.TODO())
  1252  
  1253  	// If container failed to exit in stopTimeout seconds of SIGTERM, then using the force
  1254  	if err := daemon.containerStop(ctx, c, containertypes.StopOptions{}); err != nil {
  1255  		return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err)
  1256  	}
  1257  
  1258  	// Wait without timeout for the container to exit.
  1259  	// Ignore the result.
  1260  	<-c.Wait(ctx, container.WaitConditionNotRunning)
  1261  	return nil
  1262  }
  1263  
  1264  // ShutdownTimeout returns the timeout (in seconds) before containers are forcibly
  1265  // killed during shutdown. The default timeout can be configured both on the daemon
  1266  // and per container, and the longest timeout will be used. A grace-period of
  1267  // 5 seconds is added to the configured timeout.
  1268  //
  1269  // A negative (-1) timeout means "indefinitely", which means that containers
  1270  // are not forcibly killed, and the daemon shuts down after all containers exit.
  1271  func (daemon *Daemon) ShutdownTimeout() int {
  1272  	return daemon.shutdownTimeout(&daemon.config().Config)
  1273  }
  1274  
  1275  func (daemon *Daemon) shutdownTimeout(cfg *config.Config) int {
  1276  	shutdownTimeout := cfg.ShutdownTimeout
  1277  	if shutdownTimeout < 0 {
  1278  		return -1
  1279  	}
  1280  	if daemon.containers == nil {
  1281  		return shutdownTimeout
  1282  	}
  1283  
  1284  	graceTimeout := 5
  1285  	for _, c := range daemon.containers.List() {
  1286  		stopTimeout := c.StopTimeout()
  1287  		if stopTimeout < 0 {
  1288  			return -1
  1289  		}
  1290  		if stopTimeout+graceTimeout > shutdownTimeout {
  1291  			shutdownTimeout = stopTimeout + graceTimeout
  1292  		}
  1293  	}
  1294  	return shutdownTimeout
  1295  }
  1296  
  1297  // Shutdown stops the daemon.
  1298  func (daemon *Daemon) Shutdown(ctx context.Context) error {
  1299  	daemon.shutdown = true
  1300  	// Keep mounts and networking running on daemon shutdown if
  1301  	// we are to keep containers running and restore them.
  1302  
  1303  	cfg := &daemon.config().Config
  1304  	if cfg.LiveRestoreEnabled && daemon.containers != nil {
  1305  		// check if there are any running containers, if none we should do some cleanup
  1306  		if ls, err := daemon.Containers(ctx, &containertypes.ListOptions{}); len(ls) != 0 || err != nil {
  1307  			// metrics plugins still need some cleanup
  1308  			daemon.cleanupMetricsPlugins()
  1309  			return err
  1310  		}
  1311  	}
  1312  
  1313  	if daemon.containers != nil {
  1314  		log.G(ctx).Debugf("daemon configured with a %d seconds minimum shutdown timeout", cfg.ShutdownTimeout)
  1315  		log.G(ctx).Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.shutdownTimeout(cfg))
  1316  		daemon.containers.ApplyAll(func(c *container.Container) {
  1317  			if !c.IsRunning() {
  1318  				return
  1319  			}
  1320  			logger := log.G(ctx).WithField("container", c.ID)
  1321  			logger.Debug("shutting down container")
  1322  			if err := daemon.shutdownContainer(c); err != nil {
  1323  				logger.WithError(err).Error("failed to shut down container")
  1324  				return
  1325  			}
  1326  			if mountid, err := daemon.imageService.GetLayerMountID(c.ID); err == nil {
  1327  				daemon.cleanupMountsByID(mountid)
  1328  			}
  1329  			logger.Debugf("shut down container")
  1330  		})
  1331  	}
  1332  
  1333  	if daemon.volumes != nil {
  1334  		if err := daemon.volumes.Shutdown(); err != nil {
  1335  			log.G(ctx).Errorf("Error shutting down volume store: %v", err)
  1336  		}
  1337  	}
  1338  
  1339  	if daemon.imageService != nil {
  1340  		if err := daemon.imageService.Cleanup(); err != nil {
  1341  			log.G(ctx).Error(err)
  1342  		}
  1343  	}
  1344  
  1345  	// If we are part of a cluster, clean up cluster's stuff
  1346  	if daemon.clusterProvider != nil {
  1347  		log.G(ctx).Debugf("start clean shutdown of cluster resources...")
  1348  		daemon.DaemonLeavesCluster()
  1349  	}
  1350  
  1351  	daemon.cleanupMetricsPlugins()
  1352  
  1353  	// Shutdown plugins after containers and layerstore. Don't change the order.
  1354  	daemon.pluginShutdown()
  1355  
  1356  	// trigger libnetwork Stop only if it's initialized
  1357  	if daemon.netController != nil {
  1358  		daemon.netController.Stop()
  1359  	}
  1360  
  1361  	if daemon.containerdClient != nil {
  1362  		daemon.containerdClient.Close()
  1363  	}
  1364  
  1365  	if daemon.mdDB != nil {
  1366  		daemon.mdDB.Close()
  1367  	}
  1368  
  1369  	return daemon.cleanupMounts(cfg)
  1370  }
  1371  
  1372  // Mount sets container.BaseFS
  1373  func (daemon *Daemon) Mount(container *container.Container) error {
  1374  	return daemon.imageService.Mount(context.Background(), container)
  1375  }
  1376  
  1377  // Unmount unsets the container base filesystem
  1378  func (daemon *Daemon) Unmount(container *container.Container) error {
  1379  	return daemon.imageService.Unmount(context.Background(), container)
  1380  }
  1381  
  1382  // Subnets return the IPv4 and IPv6 subnets of networks that are manager by Docker.
  1383  func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) {
  1384  	var v4Subnets []net.IPNet
  1385  	var v6Subnets []net.IPNet
  1386  
  1387  	for _, managedNetwork := range daemon.netController.Networks(context.TODO()) {
  1388  		v4infos, v6infos := managedNetwork.IpamInfo()
  1389  		for _, info := range v4infos {
  1390  			if info.IPAMData.Pool != nil {
  1391  				v4Subnets = append(v4Subnets, *info.IPAMData.Pool)
  1392  			}
  1393  		}
  1394  		for _, info := range v6infos {
  1395  			if info.IPAMData.Pool != nil {
  1396  				v6Subnets = append(v6Subnets, *info.IPAMData.Pool)
  1397  			}
  1398  		}
  1399  	}
  1400  
  1401  	return v4Subnets, v6Subnets
  1402  }
  1403  
  1404  // prepareTempDir prepares and returns the default directory to use
  1405  // for temporary files.
  1406  // If it doesn't exist, it is created. If it exists, its content is removed.
  1407  func prepareTempDir(rootDir string) (string, error) {
  1408  	var tmpDir string
  1409  	if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" {
  1410  		tmpDir = filepath.Join(rootDir, "tmp")
  1411  		newName := tmpDir + "-old"
  1412  		if err := os.Rename(tmpDir, newName); err == nil {
  1413  			go func() {
  1414  				if err := os.RemoveAll(newName); err != nil {
  1415  					log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", newName)
  1416  				}
  1417  			}()
  1418  		} else if !os.IsNotExist(err) {
  1419  			log.G(context.TODO()).Warnf("failed to rename %s for background deletion: %s. Deleting synchronously", tmpDir, err)
  1420  			if err := os.RemoveAll(tmpDir); err != nil {
  1421  				log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", tmpDir)
  1422  			}
  1423  		}
  1424  	}
  1425  	return tmpDir, idtools.MkdirAllAndChown(tmpDir, 0o700, idtools.CurrentIdentity())
  1426  }
  1427  
  1428  func (daemon *Daemon) setGenericResources(conf *config.Config) error {
  1429  	genericResources, err := config.ParseGenericResources(conf.NodeGenericResources)
  1430  	if err != nil {
  1431  		return err
  1432  	}
  1433  
  1434  	daemon.genericResources = genericResources
  1435  
  1436  	return nil
  1437  }
  1438  
  1439  // IsShuttingDown tells whether the daemon is shutting down or not
  1440  func (daemon *Daemon) IsShuttingDown() bool {
  1441  	return daemon.shutdown
  1442  }
  1443  
  1444  func isBridgeNetworkDisabled(conf *config.Config) bool {
  1445  	return conf.BridgeConfig.Iface == config.DisableNetworkBridge
  1446  }
  1447  
  1448  func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) {
  1449  	dd := runconfig.DefaultDaemonNetworkMode()
  1450  
  1451  	options := []nwconfig.Option{
  1452  		nwconfig.OptionDataDir(conf.Root),
  1453  		nwconfig.OptionExecRoot(conf.GetExecRoot()),
  1454  		nwconfig.OptionDefaultDriver(string(dd)),
  1455  		nwconfig.OptionDefaultNetwork(dd.NetworkName()),
  1456  		nwconfig.OptionLabels(conf.Labels),
  1457  		nwconfig.OptionNetworkControlPlaneMTU(conf.NetworkControlPlaneMTU),
  1458  		driverOptions(conf),
  1459  	}
  1460  
  1461  	if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 {
  1462  		options = append(options, nwconfig.OptionDefaultAddressPoolConfig(conf.NetworkConfig.DefaultAddressPools.Value()))
  1463  	}
  1464  	if conf.LiveRestoreEnabled && len(activeSandboxes) != 0 {
  1465  		options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes))
  1466  	}
  1467  	if pg != nil {
  1468  		options = append(options, nwconfig.OptionPluginGetter(pg))
  1469  	}
  1470  
  1471  	return options, nil
  1472  }
  1473  
  1474  // GetCluster returns the cluster
  1475  func (daemon *Daemon) GetCluster() Cluster {
  1476  	return daemon.cluster
  1477  }
  1478  
  1479  // SetCluster sets the cluster
  1480  func (daemon *Daemon) SetCluster(cluster Cluster) {
  1481  	daemon.cluster = cluster
  1482  }
  1483  
  1484  func (daemon *Daemon) pluginShutdown() {
  1485  	manager := daemon.pluginManager
  1486  	// Check for a valid manager object. In error conditions, daemon init can fail
  1487  	// and shutdown called, before plugin manager is initialized.
  1488  	if manager != nil {
  1489  		manager.Shutdown()
  1490  	}
  1491  }
  1492  
  1493  // PluginManager returns current pluginManager associated with the daemon
  1494  func (daemon *Daemon) PluginManager() *plugin.Manager { // set up before daemon to avoid this method
  1495  	return daemon.pluginManager
  1496  }
  1497  
  1498  // PluginGetter returns current pluginStore associated with the daemon
  1499  func (daemon *Daemon) PluginGetter() *plugin.Store {
  1500  	return daemon.PluginStore
  1501  }
  1502  
  1503  // CreateDaemonRoot creates the root for the daemon
  1504  func CreateDaemonRoot(config *config.Config) error {
  1505  	// get the canonical path to the Docker root directory
  1506  	var realRoot string
  1507  	if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) {
  1508  		realRoot = config.Root
  1509  	} else {
  1510  		realRoot, err = fileutils.ReadSymlinkedDirectory(config.Root)
  1511  		if err != nil {
  1512  			return fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err)
  1513  		}
  1514  	}
  1515  
  1516  	idMapping, err := setupRemappedRoot(config)
  1517  	if err != nil {
  1518  		return err
  1519  	}
  1520  	return setupDaemonRoot(config, realRoot, idMapping.RootPair())
  1521  }
  1522  
  1523  // RemapContainerdNamespaces returns the right containerd namespaces to use:
  1524  // - if they are not already set in the config file
  1525  // -  and the daemon is running with user namespace remapping enabled
  1526  // Then it will return new namespace names, otherwise it will return the existing
  1527  // namespaces
  1528  func RemapContainerdNamespaces(config *config.Config) (ns string, pluginNs string, err error) {
  1529  	idMapping, err := setupRemappedRoot(config)
  1530  	if err != nil {
  1531  		return "", "", err
  1532  	}
  1533  	if idMapping.Empty() {
  1534  		return config.ContainerdNamespace, config.ContainerdPluginNamespace, nil
  1535  	}
  1536  	root := idMapping.RootPair()
  1537  
  1538  	ns = config.ContainerdNamespace
  1539  	if _, ok := config.ValuesSet["containerd-namespace"]; !ok {
  1540  		ns = fmt.Sprintf("%s-%d.%d", config.ContainerdNamespace, root.UID, root.GID)
  1541  	}
  1542  
  1543  	pluginNs = config.ContainerdPluginNamespace
  1544  	if _, ok := config.ValuesSet["containerd-plugin-namespace"]; !ok {
  1545  		pluginNs = fmt.Sprintf("%s-%d.%d", config.ContainerdPluginNamespace, root.UID, root.GID)
  1546  	}
  1547  
  1548  	return
  1549  }
  1550  
  1551  // checkpointAndSave grabs a container lock to safely call container.CheckpointTo
  1552  func (daemon *Daemon) checkpointAndSave(container *container.Container) error {
  1553  	container.Lock()
  1554  	defer container.Unlock()
  1555  	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
  1556  		return fmt.Errorf("Error saving container state: %v", err)
  1557  	}
  1558  	return nil
  1559  }
  1560  
  1561  // because the CLI sends a -1 when it wants to unset the swappiness value
  1562  // we need to clear it on the server side
  1563  func fixMemorySwappiness(resources *containertypes.Resources) {
  1564  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness == -1 {
  1565  		resources.MemorySwappiness = nil
  1566  	}
  1567  }
  1568  
  1569  // GetAttachmentStore returns current attachment store associated with the daemon
  1570  func (daemon *Daemon) GetAttachmentStore() *network.AttachmentStore {
  1571  	return &daemon.attachmentStore
  1572  }
  1573  
  1574  // IdentityMapping returns uid/gid mapping or a SID (in the case of Windows) for the builder
  1575  func (daemon *Daemon) IdentityMapping() idtools.IdentityMapping {
  1576  	return daemon.idMapping
  1577  }
  1578  
  1579  // ImageService returns the Daemon's ImageService
  1580  func (daemon *Daemon) ImageService() ImageService {
  1581  	return daemon.imageService
  1582  }
  1583  
  1584  // ImageBackend returns an image-backend for Swarm and the distribution router.
  1585  func (daemon *Daemon) ImageBackend() executorpkg.ImageBackend {
  1586  	return &imageBackend{
  1587  		ImageService:    daemon.imageService,
  1588  		registryService: daemon.registryService,
  1589  	}
  1590  }
  1591  
  1592  // RegistryService returns the Daemon's RegistryService
  1593  func (daemon *Daemon) RegistryService() *registry.Service {
  1594  	return daemon.registryService
  1595  }
  1596  
  1597  // BuilderBackend returns the backend used by builder
  1598  func (daemon *Daemon) BuilderBackend() builder.Backend {
  1599  	return struct {
  1600  		*Daemon
  1601  		ImageService
  1602  	}{daemon, daemon.imageService}
  1603  }
  1604  
  1605  // RawSysInfo returns *sysinfo.SysInfo .
  1606  func (daemon *Daemon) RawSysInfo() *sysinfo.SysInfo {
  1607  	daemon.sysInfoOnce.Do(func() {
  1608  		// We check if sysInfo is not set here, to allow some test to
  1609  		// override the actual sysInfo.
  1610  		if daemon.sysInfo == nil {
  1611  			daemon.sysInfo = getSysInfo(&daemon.config().Config)
  1612  		}
  1613  	})
  1614  
  1615  	return daemon.sysInfo
  1616  }
  1617  
  1618  // imageBackend is used to satisfy the [executorpkg.ImageBackend] and
  1619  // [github.com/docker/docker/api/server/router/distribution.Backend]
  1620  // interfaces.
  1621  type imageBackend struct {
  1622  	ImageService
  1623  	registryService *registry.Service
  1624  }
  1625  
  1626  // GetRepositories returns a list of repositories configured for the given
  1627  // reference. Multiple repositories can be returned if the reference is for
  1628  // the default (Docker Hub) registry and a mirror is configured, but it omits
  1629  // registries that were not reachable (pinging the /v2/ endpoint failed).
  1630  //
  1631  // It returns an error if it was unable to reach any of the registries for
  1632  // the given reference, or if the provided reference is invalid.
  1633  func (i *imageBackend) GetRepositories(ctx context.Context, ref reference.Named, authConfig *registrytypes.AuthConfig) ([]dist.Repository, error) {
  1634  	return distribution.GetRepositories(ctx, ref, &distribution.ImagePullConfig{
  1635  		Config: distribution.Config{
  1636  			AuthConfig:      authConfig,
  1637  			RegistryService: i.registryService,
  1638  		},
  1639  	})
  1640  }