github.com/moby/docker@v26.1.3+incompatible/daemon/daemon.go (about)

     1  // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
     2  //go:build go1.19
     3  
     4  // Package daemon exposes the functions that occur on the host server
     5  // that the Docker daemon is running.
     6  //
     7  // In implementing the various functions of the daemon, there is often
     8  // a method-specific struct for configuring the runtime behavior.
     9  package daemon // import "github.com/docker/docker/daemon"
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"net"
    15  	"os"
    16  	"path"
    17  	"path/filepath"
    18  	"runtime"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/containerd/containerd"
    24  	"github.com/containerd/containerd/defaults"
    25  	"github.com/containerd/containerd/pkg/dialer"
    26  	"github.com/containerd/containerd/pkg/userns"
    27  	"github.com/containerd/containerd/remotes/docker"
    28  	"github.com/containerd/log"
    29  	"github.com/distribution/reference"
    30  	dist "github.com/docker/distribution"
    31  	"github.com/docker/docker/api/types"
    32  	"github.com/docker/docker/api/types/backend"
    33  	containertypes "github.com/docker/docker/api/types/container"
    34  	imagetypes "github.com/docker/docker/api/types/image"
    35  	networktypes "github.com/docker/docker/api/types/network"
    36  	registrytypes "github.com/docker/docker/api/types/registry"
    37  	"github.com/docker/docker/api/types/swarm"
    38  	"github.com/docker/docker/api/types/volume"
    39  	"github.com/docker/docker/builder"
    40  	"github.com/docker/docker/container"
    41  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    42  	"github.com/docker/docker/daemon/config"
    43  	ctrd "github.com/docker/docker/daemon/containerd"
    44  	"github.com/docker/docker/daemon/events"
    45  	_ "github.com/docker/docker/daemon/graphdriver/register" // register graph drivers
    46  	"github.com/docker/docker/daemon/images"
    47  	dlogger "github.com/docker/docker/daemon/logger"
    48  	"github.com/docker/docker/daemon/logger/local"
    49  	"github.com/docker/docker/daemon/network"
    50  	"github.com/docker/docker/daemon/snapshotter"
    51  	"github.com/docker/docker/daemon/stats"
    52  	"github.com/docker/docker/distribution"
    53  	dmetadata "github.com/docker/docker/distribution/metadata"
    54  	"github.com/docker/docker/dockerversion"
    55  	"github.com/docker/docker/errdefs"
    56  	"github.com/docker/docker/image"
    57  	"github.com/docker/docker/internal/compatcontext"
    58  	"github.com/docker/docker/layer"
    59  	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    60  	"github.com/docker/docker/libnetwork"
    61  	"github.com/docker/docker/libnetwork/cluster"
    62  	nwconfig "github.com/docker/docker/libnetwork/config"
    63  	"github.com/docker/docker/pkg/authorization"
    64  	"github.com/docker/docker/pkg/fileutils"
    65  	"github.com/docker/docker/pkg/idtools"
    66  	"github.com/docker/docker/pkg/plugingetter"
    67  	"github.com/docker/docker/pkg/sysinfo"
    68  	"github.com/docker/docker/pkg/system"
    69  	"github.com/docker/docker/plugin"
    70  	pluginexec "github.com/docker/docker/plugin/executor/containerd"
    71  	refstore "github.com/docker/docker/reference"
    72  	"github.com/docker/docker/registry"
    73  	"github.com/docker/docker/runconfig"
    74  	volumesservice "github.com/docker/docker/volume/service"
    75  	"github.com/moby/buildkit/util/resolver"
    76  	resolverconfig "github.com/moby/buildkit/util/resolver/config"
    77  	"github.com/moby/locker"
    78  	"github.com/pkg/errors"
    79  	"go.etcd.io/bbolt"
    80  	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
    81  	"golang.org/x/sync/semaphore"
    82  	"google.golang.org/grpc"
    83  	"google.golang.org/grpc/backoff"
    84  	"google.golang.org/grpc/credentials/insecure"
    85  	"resenje.org/singleflight"
    86  )
    87  
    88  type configStore struct {
    89  	config.Config
    90  
    91  	Runtimes runtimes
    92  }
    93  
    94  // Daemon holds information about the Docker daemon.
    95  type Daemon struct {
    96  	id                    string
    97  	repository            string
    98  	containers            container.Store
    99  	containersReplica     *container.ViewDB
   100  	execCommands          *container.ExecStore
   101  	imageService          ImageService
   102  	configStore           atomic.Pointer[configStore]
   103  	configReload          sync.Mutex
   104  	statsCollector        *stats.Collector
   105  	defaultLogConfig      containertypes.LogConfig
   106  	registryService       *registry.Service
   107  	EventsService         *events.Events
   108  	netController         *libnetwork.Controller
   109  	volumes               *volumesservice.VolumesService
   110  	root                  string
   111  	sysInfoOnce           sync.Once
   112  	sysInfo               *sysinfo.SysInfo
   113  	shutdown              bool
   114  	idMapping             idtools.IdentityMapping
   115  	PluginStore           *plugin.Store // TODO: remove
   116  	pluginManager         *plugin.Manager
   117  	linkIndex             *linkIndex
   118  	containerdClient      *containerd.Client
   119  	containerd            libcontainerdtypes.Client
   120  	defaultIsolation      containertypes.Isolation // Default isolation mode on Windows
   121  	clusterProvider       cluster.Provider
   122  	cluster               Cluster
   123  	genericResources      []swarm.GenericResource
   124  	metricsPluginListener net.Listener
   125  	ReferenceStore        refstore.Store
   126  
   127  	machineMemory uint64
   128  
   129  	seccompProfile     []byte
   130  	seccompProfilePath string
   131  
   132  	usageContainers singleflight.Group[struct{}, []*types.Container]
   133  	usageImages     singleflight.Group[struct{}, []*imagetypes.Summary]
   134  	usageVolumes    singleflight.Group[struct{}, []*volume.Volume]
   135  	usageLayer      singleflight.Group[struct{}, int64]
   136  
   137  	pruneRunning int32
   138  	hosts        map[string]bool // hosts stores the addresses the daemon is listening on
   139  	startupDone  chan struct{}
   140  
   141  	attachmentStore       network.AttachmentStore
   142  	attachableNetworkLock *locker.Locker
   143  
   144  	// This is used for Windows which doesn't currently support running on containerd
   145  	// It stores metadata for the content store (used for manifest caching)
   146  	// This needs to be closed on daemon exit
   147  	mdDB *bbolt.DB
   148  
   149  	usesSnapshotter bool
   150  }
   151  
   152  // ID returns the daemon id
   153  func (daemon *Daemon) ID() string {
   154  	return daemon.id
   155  }
   156  
   157  // StoreHosts stores the addresses the daemon is listening on
   158  func (daemon *Daemon) StoreHosts(hosts []string) {
   159  	if daemon.hosts == nil {
   160  		daemon.hosts = make(map[string]bool)
   161  	}
   162  	for _, h := range hosts {
   163  		daemon.hosts[h] = true
   164  	}
   165  }
   166  
   167  // config returns an immutable snapshot of the current daemon configuration.
   168  // Multiple calls to this function will return the same pointer until the
   169  // configuration is reloaded so callers must take care not to modify the
   170  // returned value.
   171  //
   172  // To ensure that the configuration used remains consistent throughout the
   173  // lifetime of an operation, the configuration pointer should be passed down the
   174  // call stack, like one would a [context.Context] value. Only the entrypoints
   175  // for operations, the outermost functions, should call this function.
   176  func (daemon *Daemon) config() *configStore {
   177  	cfg := daemon.configStore.Load()
   178  	if cfg == nil {
   179  		return &configStore{}
   180  	}
   181  	return cfg
   182  }
   183  
   184  // Config returns daemon's config.
   185  func (daemon *Daemon) Config() config.Config {
   186  	return daemon.config().Config
   187  }
   188  
   189  // HasExperimental returns whether the experimental features of the daemon are enabled or not
   190  func (daemon *Daemon) HasExperimental() bool {
   191  	return daemon.config().Experimental
   192  }
   193  
   194  // Features returns the features map from configStore
   195  func (daemon *Daemon) Features() map[string]bool {
   196  	return daemon.config().Features
   197  }
   198  
   199  // UsesSnapshotter returns true if feature flag to use containerd snapshotter is enabled
   200  func (daemon *Daemon) UsesSnapshotter() bool {
   201  	return daemon.usesSnapshotter
   202  }
   203  
   204  // RegistryHosts returns the registry hosts configuration for the host component
   205  // of a distribution image reference.
   206  func (daemon *Daemon) RegistryHosts(host string) ([]docker.RegistryHost, error) {
   207  	m := map[string]resolverconfig.RegistryConfig{
   208  		"docker.io": {Mirrors: daemon.registryService.ServiceConfig().Mirrors},
   209  	}
   210  	conf := daemon.registryService.ServiceConfig().IndexConfigs
   211  	for k, v := range conf {
   212  		c := m[k]
   213  		if !v.Secure {
   214  			t := true
   215  			c.PlainHTTP = &t
   216  			c.Insecure = &t
   217  		}
   218  		m[k] = c
   219  	}
   220  	if c, ok := m[host]; !ok && daemon.registryService.IsInsecureRegistry(host) {
   221  		t := true
   222  		c.PlainHTTP = &t
   223  		c.Insecure = &t
   224  		m[host] = c
   225  	}
   226  
   227  	for k, v := range m {
   228  		v.TLSConfigDir = []string{registry.HostCertsDir(k)}
   229  		m[k] = v
   230  	}
   231  
   232  	certsDir := registry.CertsDir()
   233  	if fis, err := os.ReadDir(certsDir); err == nil {
   234  		for _, fi := range fis {
   235  			if _, ok := m[fi.Name()]; !ok {
   236  				m[fi.Name()] = resolverconfig.RegistryConfig{
   237  					TLSConfigDir: []string{filepath.Join(certsDir, fi.Name())},
   238  				}
   239  			}
   240  		}
   241  	}
   242  
   243  	return resolver.NewRegistryConfig(m)(host)
   244  }
   245  
   246  // layerAccessor may be implemented by ImageService
   247  type layerAccessor interface {
   248  	GetLayerByID(cid string) (layer.RWLayer, error)
   249  }
   250  
   251  func (daemon *Daemon) restore(cfg *configStore) error {
   252  	var mapLock sync.Mutex
   253  	containers := make(map[string]*container.Container)
   254  
   255  	log.G(context.TODO()).Info("Loading containers: start.")
   256  
   257  	dir, err := os.ReadDir(daemon.repository)
   258  	if err != nil {
   259  		return err
   260  	}
   261  
   262  	// parallelLimit is the maximum number of parallel startup jobs that we
   263  	// allow (this is the limited used for all startup semaphores). The multipler
   264  	// (128) was chosen after some fairly significant benchmarking -- don't change
   265  	// it unless you've tested it significantly (this value is adjusted if
   266  	// RLIMIT_NOFILE is small to avoid EMFILE).
   267  	parallelLimit := adjustParallelLimit(len(dir), 128*runtime.NumCPU())
   268  
   269  	// Re-used for all parallel startup jobs.
   270  	var group sync.WaitGroup
   271  	sem := semaphore.NewWeighted(int64(parallelLimit))
   272  
   273  	for _, v := range dir {
   274  		group.Add(1)
   275  		go func(id string) {
   276  			defer group.Done()
   277  			_ = sem.Acquire(context.Background(), 1)
   278  			defer sem.Release(1)
   279  
   280  			logger := log.G(context.TODO()).WithField("container", id)
   281  
   282  			c, err := daemon.load(id)
   283  			if err != nil {
   284  				logger.WithError(err).Error("failed to load container")
   285  				return
   286  			}
   287  			if c.Driver != daemon.imageService.StorageDriver() {
   288  				// Ignore the container if it wasn't created with the current storage-driver
   289  				logger.Debugf("not restoring container because it was created with another storage driver (%s)", c.Driver)
   290  				return
   291  			}
   292  			if accessor, ok := daemon.imageService.(layerAccessor); ok {
   293  				rwlayer, err := accessor.GetLayerByID(c.ID)
   294  				if err != nil {
   295  					logger.WithError(err).Error("failed to load container mount")
   296  					return
   297  				}
   298  				c.RWLayer = rwlayer
   299  			}
   300  			logger.WithFields(log.Fields{
   301  				"running": c.IsRunning(),
   302  				"paused":  c.IsPaused(),
   303  			}).Debug("loaded container")
   304  
   305  			mapLock.Lock()
   306  			containers[c.ID] = c
   307  			mapLock.Unlock()
   308  		}(v.Name())
   309  	}
   310  	group.Wait()
   311  
   312  	removeContainers := make(map[string]*container.Container)
   313  	restartContainers := make(map[*container.Container]chan struct{})
   314  	activeSandboxes := make(map[string]interface{})
   315  
   316  	for _, c := range containers {
   317  		group.Add(1)
   318  		go func(c *container.Container) {
   319  			defer group.Done()
   320  			_ = sem.Acquire(context.Background(), 1)
   321  			defer sem.Release(1)
   322  
   323  			logger := log.G(context.TODO()).WithField("container", c.ID)
   324  
   325  			if err := daemon.registerName(c); err != nil {
   326  				logger.WithError(err).Errorf("failed to register container name: %s", c.Name)
   327  				mapLock.Lock()
   328  				delete(containers, c.ID)
   329  				mapLock.Unlock()
   330  				return
   331  			}
   332  			if err := daemon.Register(c); err != nil {
   333  				logger.WithError(err).Error("failed to register container")
   334  				mapLock.Lock()
   335  				delete(containers, c.ID)
   336  				mapLock.Unlock()
   337  				return
   338  			}
   339  		}(c)
   340  	}
   341  	group.Wait()
   342  
   343  	for _, c := range containers {
   344  		group.Add(1)
   345  		go func(c *container.Container) {
   346  			defer group.Done()
   347  			_ = sem.Acquire(context.Background(), 1)
   348  			defer sem.Release(1)
   349  
   350  			baseLogger := log.G(context.TODO()).WithField("container", c.ID)
   351  
   352  			if c.HostConfig != nil {
   353  				// Migrate containers that don't have the default ("no") restart-policy set.
   354  				// The RestartPolicy.Name field may be empty for containers that were
   355  				// created with versions before v25.0.0.
   356  				//
   357  				// We also need to set the MaximumRetryCount to 0, to prevent
   358  				// validation from failing (MaximumRetryCount is not allowed if
   359  				// no restart-policy ("none") is set).
   360  				if c.HostConfig.RestartPolicy.Name == "" {
   361  					baseLogger.Debug("migrated restart-policy")
   362  					c.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled
   363  					c.HostConfig.RestartPolicy.MaximumRetryCount = 0
   364  				}
   365  
   366  				// Migrate containers that use the deprecated (and now non-functional)
   367  				// logentries driver. Update them to use the "local" logging driver
   368  				// instead.
   369  				//
   370  				// TODO(thaJeztah): remove logentries check and migration code in release v26.0.0.
   371  				if c.HostConfig.LogConfig.Type == "logentries" {
   372  					baseLogger.Warn("migrated deprecated logentries logging driver")
   373  					c.HostConfig.LogConfig = containertypes.LogConfig{
   374  						Type: local.Name,
   375  					}
   376  				}
   377  
   378  				// Normalize the "default" network mode into the network mode
   379  				// it aliases ("bridge on Linux and "nat" on Windows). This is
   380  				// also done by the container router, for new containers. But
   381  				// we need to do it here too to handle containers that were
   382  				// created prior to v26.0.
   383  				//
   384  				// TODO(aker): remove this migration code once the next LTM version of MCR is released.
   385  				if c.HostConfig.NetworkMode.IsDefault() {
   386  					c.HostConfig.NetworkMode = runconfig.DefaultDaemonNetworkMode()
   387  					if nw, ok := c.NetworkSettings.Networks[networktypes.NetworkDefault]; ok {
   388  						c.NetworkSettings.Networks[c.HostConfig.NetworkMode.NetworkName()] = nw
   389  						delete(c.NetworkSettings.Networks, networktypes.NetworkDefault)
   390  					}
   391  				}
   392  			}
   393  
   394  			if err := daemon.checkpointAndSave(c); err != nil {
   395  				baseLogger.WithError(err).Error("failed to save migrated container config to disk")
   396  			}
   397  
   398  			daemon.setStateCounter(c)
   399  
   400  			logger := func(c *container.Container) *log.Entry {
   401  				return baseLogger.WithFields(log.Fields{
   402  					"running":    c.IsRunning(),
   403  					"paused":     c.IsPaused(),
   404  					"restarting": c.IsRestarting(),
   405  				})
   406  			}
   407  
   408  			logger(c).Debug("restoring container")
   409  
   410  			var es *containerd.ExitStatus
   411  
   412  			if err := c.RestoreTask(context.Background(), daemon.containerd); err != nil && !errdefs.IsNotFound(err) {
   413  				logger(c).WithError(err).Error("failed to restore container with containerd")
   414  				return
   415  			}
   416  
   417  			alive := false
   418  			status := containerd.Unknown
   419  			if tsk, ok := c.Task(); ok {
   420  				s, err := tsk.Status(context.Background())
   421  				if err != nil {
   422  					logger(c).WithError(err).Error("failed to get task status")
   423  				} else {
   424  					status = s.Status
   425  					alive = status != containerd.Stopped
   426  					if !alive {
   427  						logger(c).Debug("cleaning up dead container process")
   428  						es, err = tsk.Delete(context.Background())
   429  						if err != nil && !errdefs.IsNotFound(err) {
   430  							logger(c).WithError(err).Error("failed to delete task from containerd")
   431  							return
   432  						}
   433  					} else if !cfg.LiveRestoreEnabled {
   434  						logger(c).Debug("shutting down container considered alive by containerd")
   435  						if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
   436  							baseLogger.WithError(err).Error("error shutting down container")
   437  							return
   438  						}
   439  						status = containerd.Stopped
   440  						alive = false
   441  						c.ResetRestartManager(false)
   442  					}
   443  				}
   444  			}
   445  			// If the containerd task for the container was not found, docker's view of the
   446  			// container state will be updated accordingly via SetStopped further down.
   447  
   448  			if c.IsRunning() || c.IsPaused() {
   449  				logger(c).Debug("syncing container on disk state with real state")
   450  
   451  				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
   452  
   453  				switch {
   454  				case c.IsPaused() && alive:
   455  					logger(c).WithField("state", status).Info("restored container paused")
   456  					switch status {
   457  					case containerd.Paused, containerd.Pausing:
   458  						// nothing to do
   459  					case containerd.Unknown, containerd.Stopped, "":
   460  						baseLogger.WithField("status", status).Error("unexpected status for paused container during restore")
   461  					default:
   462  						// running
   463  						c.Lock()
   464  						c.Paused = false
   465  						daemon.setStateCounter(c)
   466  						daemon.initHealthMonitor(c)
   467  						if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   468  							baseLogger.WithError(err).Error("failed to update paused container state")
   469  						}
   470  						c.Unlock()
   471  					}
   472  				case !c.IsPaused() && alive:
   473  					logger(c).Debug("restoring healthcheck")
   474  					c.Lock()
   475  					daemon.initHealthMonitor(c)
   476  					c.Unlock()
   477  				}
   478  
   479  				if !alive {
   480  					logger(c).Debug("setting stopped state")
   481  					c.Lock()
   482  					var ces container.ExitStatus
   483  					if es != nil {
   484  						ces.ExitCode = int(es.ExitCode())
   485  						ces.ExitedAt = es.ExitTime()
   486  					} else {
   487  						ces.ExitCode = 255
   488  					}
   489  					c.SetStopped(&ces)
   490  					daemon.Cleanup(context.TODO(), c)
   491  					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   492  						baseLogger.WithError(err).Error("failed to update stopped container state")
   493  					}
   494  					c.Unlock()
   495  					logger(c).Debug("set stopped state")
   496  				}
   497  
   498  				// we call Mount and then Unmount to get BaseFs of the container
   499  				if err := daemon.Mount(c); err != nil {
   500  					// The mount is unlikely to fail. However, in case mount fails
   501  					// the container should be allowed to restore here. Some functionalities
   502  					// (like docker exec -u user) might be missing but container is able to be
   503  					// stopped/restarted/removed.
   504  					// See #29365 for related information.
   505  					// The error is only logged here.
   506  					logger(c).WithError(err).Warn("failed to mount container to get BaseFs path")
   507  				} else {
   508  					if err := daemon.Unmount(c); err != nil {
   509  						logger(c).WithError(err).Warn("failed to umount container to get BaseFs path")
   510  					}
   511  				}
   512  
   513  				c.ResetRestartManager(false)
   514  				if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
   515  					options, err := daemon.buildSandboxOptions(&cfg.Config, c)
   516  					if err != nil {
   517  						logger(c).WithError(err).Warn("failed to build sandbox option to restore container")
   518  					}
   519  					mapLock.Lock()
   520  					activeSandboxes[c.NetworkSettings.SandboxID] = options
   521  					mapLock.Unlock()
   522  				}
   523  			}
   524  
   525  			// get list of containers we need to restart
   526  
   527  			// Do not autostart containers which
   528  			// has endpoints in a swarm scope
   529  			// network yet since the cluster is
   530  			// not initialized yet. We will start
   531  			// it after the cluster is
   532  			// initialized.
   533  			if cfg.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
   534  				mapLock.Lock()
   535  				restartContainers[c] = make(chan struct{})
   536  				mapLock.Unlock()
   537  			} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
   538  				// Remove the container if live-restore is disabled or if the container has already exited.
   539  				if !cfg.LiveRestoreEnabled || !alive {
   540  					mapLock.Lock()
   541  					removeContainers[c.ID] = c
   542  					mapLock.Unlock()
   543  				}
   544  			}
   545  
   546  			c.Lock()
   547  			if c.RemovalInProgress {
   548  				// We probably crashed in the middle of a removal, reset
   549  				// the flag.
   550  				//
   551  				// We DO NOT remove the container here as we do not
   552  				// know if the user had requested for either the
   553  				// associated volumes, network links or both to also
   554  				// be removed. So we put the container in the "dead"
   555  				// state and leave further processing up to them.
   556  				c.RemovalInProgress = false
   557  				c.Dead = true
   558  				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   559  					baseLogger.WithError(err).Error("failed to update RemovalInProgress container state")
   560  				} else {
   561  					baseLogger.Debugf("reset RemovalInProgress state for container")
   562  				}
   563  			}
   564  			c.Unlock()
   565  			logger(c).Debug("done restoring container")
   566  		}(c)
   567  	}
   568  	group.Wait()
   569  
   570  	// Initialize the network controller and configure network settings.
   571  	//
   572  	// Note that we cannot initialize the network controller earlier, as it
   573  	// needs to know if there's active sandboxes (running containers).
   574  	if err = daemon.initNetworkController(&cfg.Config, activeSandboxes); err != nil {
   575  		return fmt.Errorf("Error initializing network controller: %v", err)
   576  	}
   577  
   578  	// Now that all the containers are registered, register the links
   579  	for _, c := range containers {
   580  		group.Add(1)
   581  		go func(c *container.Container) {
   582  			_ = sem.Acquire(context.Background(), 1)
   583  
   584  			if err := daemon.registerLinks(c, c.HostConfig); err != nil {
   585  				log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to register link for container")
   586  			}
   587  
   588  			sem.Release(1)
   589  			group.Done()
   590  		}(c)
   591  	}
   592  	group.Wait()
   593  
   594  	for c, notifyChan := range restartContainers {
   595  		group.Add(1)
   596  		go func(c *container.Container, chNotify chan struct{}) {
   597  			_ = sem.Acquire(context.Background(), 1)
   598  
   599  			logger := log.G(context.TODO()).WithField("container", c.ID)
   600  
   601  			logger.Debug("starting container")
   602  
   603  			// ignore errors here as this is a best effort to wait for children to be
   604  			//   running before we try to start the container
   605  			children := daemon.children(c)
   606  			timeout := time.NewTimer(5 * time.Second)
   607  			defer timeout.Stop()
   608  
   609  			for _, child := range children {
   610  				if notifier, exists := restartContainers[child]; exists {
   611  					select {
   612  					case <-notifier:
   613  					case <-timeout.C:
   614  					}
   615  				}
   616  			}
   617  
   618  			if err := daemon.prepareMountPoints(c); err != nil {
   619  				logger.WithError(err).Error("failed to prepare mount points for container")
   620  			}
   621  			if err := daemon.containerStart(context.Background(), cfg, c, "", "", true); err != nil {
   622  				logger.WithError(err).Error("failed to start container")
   623  			}
   624  			close(chNotify)
   625  
   626  			sem.Release(1)
   627  			group.Done()
   628  		}(c, notifyChan)
   629  	}
   630  	group.Wait()
   631  
   632  	for id := range removeContainers {
   633  		group.Add(1)
   634  		go func(cid string) {
   635  			_ = sem.Acquire(context.Background(), 1)
   636  
   637  			if err := daemon.containerRm(&cfg.Config, cid, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
   638  				log.G(context.TODO()).WithField("container", cid).WithError(err).Error("failed to remove container")
   639  			}
   640  
   641  			sem.Release(1)
   642  			group.Done()
   643  		}(id)
   644  	}
   645  	group.Wait()
   646  
   647  	// any containers that were started above would already have had this done,
   648  	// however we need to now prepare the mountpoints for the rest of the containers as well.
   649  	// This shouldn't cause any issue running on the containers that already had this run.
   650  	// This must be run after any containers with a restart policy so that containerized plugins
   651  	// can have a chance to be running before we try to initialize them.
   652  	for _, c := range containers {
   653  		// if the container has restart policy, do not
   654  		// prepare the mountpoints since it has been done on restarting.
   655  		// This is to speed up the daemon start when a restart container
   656  		// has a volume and the volume driver is not available.
   657  		if _, ok := restartContainers[c]; ok {
   658  			continue
   659  		} else if _, ok := removeContainers[c.ID]; ok {
   660  			// container is automatically removed, skip it.
   661  			continue
   662  		}
   663  
   664  		group.Add(1)
   665  		go func(c *container.Container) {
   666  			_ = sem.Acquire(context.Background(), 1)
   667  
   668  			if err := daemon.prepareMountPoints(c); err != nil {
   669  				log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
   670  			}
   671  
   672  			sem.Release(1)
   673  			group.Done()
   674  		}(c)
   675  	}
   676  	group.Wait()
   677  
   678  	log.G(context.TODO()).Info("Loading containers: done.")
   679  
   680  	return nil
   681  }
   682  
   683  // RestartSwarmContainers restarts any autostart container which has a
   684  // swarm endpoint.
   685  func (daemon *Daemon) RestartSwarmContainers() {
   686  	daemon.restartSwarmContainers(context.Background(), daemon.config())
   687  }
   688  
   689  func (daemon *Daemon) restartSwarmContainers(ctx context.Context, cfg *configStore) {
   690  	// parallelLimit is the maximum number of parallel startup jobs that we
   691  	// allow (this is the limited used for all startup semaphores). The multipler
   692  	// (128) was chosen after some fairly significant benchmarking -- don't change
   693  	// it unless you've tested it significantly (this value is adjusted if
   694  	// RLIMIT_NOFILE is small to avoid EMFILE).
   695  	parallelLimit := adjustParallelLimit(len(daemon.List()), 128*runtime.NumCPU())
   696  
   697  	var group sync.WaitGroup
   698  	sem := semaphore.NewWeighted(int64(parallelLimit))
   699  
   700  	for _, c := range daemon.List() {
   701  		if !c.IsRunning() && !c.IsPaused() {
   702  			// Autostart all the containers which has a
   703  			// swarm endpoint now that the cluster is
   704  			// initialized.
   705  			if cfg.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
   706  				group.Add(1)
   707  				go func(c *container.Container) {
   708  					if err := sem.Acquire(ctx, 1); err != nil {
   709  						// ctx is done.
   710  						group.Done()
   711  						return
   712  					}
   713  
   714  					if err := daemon.containerStart(ctx, cfg, c, "", "", true); err != nil {
   715  						log.G(ctx).WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
   716  					}
   717  
   718  					sem.Release(1)
   719  					group.Done()
   720  				}(c)
   721  			}
   722  		}
   723  	}
   724  	group.Wait()
   725  }
   726  
   727  func (daemon *Daemon) children(c *container.Container) map[string]*container.Container {
   728  	return daemon.linkIndex.children(c)
   729  }
   730  
   731  // parents returns the names of the parent containers of the container
   732  // with the given name.
   733  func (daemon *Daemon) parents(c *container.Container) map[string]*container.Container {
   734  	return daemon.linkIndex.parents(c)
   735  }
   736  
   737  func (daemon *Daemon) registerLink(parent, child *container.Container, alias string) error {
   738  	fullName := path.Join(parent.Name, alias)
   739  	if err := daemon.containersReplica.ReserveName(fullName, child.ID); err != nil {
   740  		if errors.Is(err, container.ErrNameReserved) {
   741  			log.G(context.TODO()).Warnf("error registering link for %s, to %s, as alias %s, ignoring: %v", parent.ID, child.ID, alias, err)
   742  			return nil
   743  		}
   744  		return err
   745  	}
   746  	daemon.linkIndex.link(parent, child, fullName)
   747  	return nil
   748  }
   749  
   750  // DaemonJoinsCluster informs the daemon has joined the cluster and provides
   751  // the handler to query the cluster component
   752  func (daemon *Daemon) DaemonJoinsCluster(clusterProvider cluster.Provider) {
   753  	daemon.setClusterProvider(clusterProvider)
   754  }
   755  
   756  // DaemonLeavesCluster informs the daemon has left the cluster
   757  func (daemon *Daemon) DaemonLeavesCluster() {
   758  	// Daemon is in charge of removing the attachable networks with
   759  	// connected containers when the node leaves the swarm
   760  	daemon.clearAttachableNetworks()
   761  	// We no longer need the cluster provider, stop it now so that
   762  	// the network agent will stop listening to cluster events.
   763  	daemon.setClusterProvider(nil)
   764  	// Wait for the networking cluster agent to stop
   765  	daemon.netController.AgentStopWait()
   766  	// Daemon is in charge of removing the ingress network when the
   767  	// node leaves the swarm. Wait for job to be done or timeout.
   768  	// This is called also on graceful daemon shutdown. We need to
   769  	// wait, because the ingress release has to happen before the
   770  	// network controller is stopped.
   771  
   772  	if done, err := daemon.ReleaseIngress(); err == nil {
   773  		timeout := time.NewTimer(5 * time.Second)
   774  		defer timeout.Stop()
   775  
   776  		select {
   777  		case <-done:
   778  		case <-timeout.C:
   779  			log.G(context.TODO()).Warn("timeout while waiting for ingress network removal")
   780  		}
   781  	} else {
   782  		log.G(context.TODO()).Warnf("failed to initiate ingress network removal: %v", err)
   783  	}
   784  
   785  	daemon.attachmentStore.ClearAttachments()
   786  }
   787  
   788  // setClusterProvider sets a component for querying the current cluster state.
   789  func (daemon *Daemon) setClusterProvider(clusterProvider cluster.Provider) {
   790  	daemon.clusterProvider = clusterProvider
   791  	daemon.netController.SetClusterProvider(clusterProvider)
   792  	daemon.attachableNetworkLock = locker.New()
   793  }
   794  
   795  // IsSwarmCompatible verifies if the current daemon
   796  // configuration is compatible with the swarm mode
   797  func (daemon *Daemon) IsSwarmCompatible() error {
   798  	return daemon.config().IsSwarmCompatible()
   799  }
   800  
   801  // NewDaemon sets up everything for the daemon to be able to service
   802  // requests from the webserver.
   803  func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.Store, authzMiddleware *authorization.Middleware) (daemon *Daemon, err error) {
   804  	// Verify platform-specific requirements.
   805  	// TODO(thaJeztah): this should be called before we try to create the daemon; perhaps together with the config validation.
   806  	if err := checkSystem(); err != nil {
   807  		return nil, err
   808  	}
   809  
   810  	registryService, err := registry.NewService(config.ServiceOptions)
   811  	if err != nil {
   812  		return nil, err
   813  	}
   814  
   815  	// Ensure that we have a correct root key limit for launching containers.
   816  	if err := modifyRootKeyLimit(); err != nil {
   817  		log.G(ctx).Warnf("unable to modify root key limit, number of containers could be limited by this quota: %v", err)
   818  	}
   819  
   820  	// Ensure we have compatible and valid configuration options
   821  	if err := verifyDaemonSettings(config); err != nil {
   822  		return nil, err
   823  	}
   824  
   825  	// Do we have a disabled network?
   826  	config.DisableBridge = isBridgeNetworkDisabled(config)
   827  
   828  	// Setup the resolv.conf
   829  	setupResolvConf(config)
   830  
   831  	idMapping, err := setupRemappedRoot(config)
   832  	if err != nil {
   833  		return nil, err
   834  	}
   835  	rootIDs := idMapping.RootPair()
   836  	if err := setMayDetachMounts(); err != nil {
   837  		log.G(ctx).WithError(err).Warn("Could not set may_detach_mounts kernel parameter")
   838  	}
   839  
   840  	// set up the tmpDir to use a canonical path
   841  	tmp, err := prepareTempDir(config.Root)
   842  	if err != nil {
   843  		return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err)
   844  	}
   845  	realTmp, err := fileutils.ReadSymlinkedDirectory(tmp)
   846  	if err != nil {
   847  		return nil, fmt.Errorf("Unable to get the full path to the TempDir (%s): %s", tmp, err)
   848  	}
   849  	if isWindows {
   850  		if err := system.MkdirAll(realTmp, 0); err != nil {
   851  			return nil, fmt.Errorf("Unable to create the TempDir (%s): %s", realTmp, err)
   852  		}
   853  		os.Setenv("TEMP", realTmp)
   854  		os.Setenv("TMP", realTmp)
   855  	} else {
   856  		os.Setenv("TMPDIR", realTmp)
   857  	}
   858  
   859  	if err := initRuntimesDir(config); err != nil {
   860  		return nil, err
   861  	}
   862  	rts, err := setupRuntimes(config)
   863  	if err != nil {
   864  		return nil, err
   865  	}
   866  
   867  	d := &Daemon{
   868  		PluginStore: pluginStore,
   869  		startupDone: make(chan struct{}),
   870  	}
   871  	cfgStore := &configStore{
   872  		Config:   *config,
   873  		Runtimes: rts,
   874  	}
   875  	d.configStore.Store(cfgStore)
   876  
   877  	// TEST_INTEGRATION_USE_SNAPSHOTTER is used for integration tests only.
   878  	if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
   879  		d.usesSnapshotter = true
   880  	} else {
   881  		d.usesSnapshotter = config.Features["containerd-snapshotter"]
   882  	}
   883  
   884  	// Ensure the daemon is properly shutdown if there is a failure during
   885  	// initialization
   886  	defer func() {
   887  		if err != nil {
   888  			// Use a fresh context here. Passed context could be cancelled.
   889  			if err := d.Shutdown(context.Background()); err != nil {
   890  				log.G(ctx).Error(err)
   891  			}
   892  		}
   893  	}()
   894  
   895  	if err := d.setGenericResources(&cfgStore.Config); err != nil {
   896  		return nil, err
   897  	}
   898  	// set up SIGUSR1 handler on Unix-like systems, or a Win32 global event
   899  	// on Windows to dump Go routine stacks
   900  	stackDumpDir := cfgStore.Root
   901  	if execRoot := cfgStore.GetExecRoot(); execRoot != "" {
   902  		stackDumpDir = execRoot
   903  	}
   904  	d.setupDumpStackTrap(stackDumpDir)
   905  
   906  	if err := d.setupSeccompProfile(&cfgStore.Config); err != nil {
   907  		return nil, err
   908  	}
   909  
   910  	// Set the default isolation mode (only applicable on Windows)
   911  	if err := d.setDefaultIsolation(&cfgStore.Config); err != nil {
   912  		return nil, fmt.Errorf("error setting default isolation mode: %v", err)
   913  	}
   914  
   915  	if err := configureMaxThreads(&cfgStore.Config); err != nil {
   916  		log.G(ctx).Warnf("Failed to configure golang's threads limit: %v", err)
   917  	}
   918  
   919  	// ensureDefaultAppArmorProfile does nothing if apparmor is disabled
   920  	if err := ensureDefaultAppArmorProfile(); err != nil {
   921  		log.G(ctx).Errorf(err.Error())
   922  	}
   923  
   924  	daemonRepo := filepath.Join(cfgStore.Root, "containers")
   925  	if err := idtools.MkdirAllAndChown(daemonRepo, 0o710, idtools.Identity{
   926  		UID: idtools.CurrentIdentity().UID,
   927  		GID: rootIDs.GID,
   928  	}); err != nil {
   929  		return nil, err
   930  	}
   931  
   932  	if isWindows {
   933  		// Note that permissions (0o700) are ignored on Windows; passing them to
   934  		// show intent only. We could consider using idtools.MkdirAndChown here
   935  		// to apply an ACL.
   936  		if err = os.Mkdir(filepath.Join(cfgStore.Root, "credentialspecs"), 0o700); err != nil && !errors.Is(err, os.ErrExist) {
   937  			return nil, err
   938  		}
   939  	}
   940  
   941  	d.registryService = registryService
   942  	dlogger.RegisterPluginGetter(d.PluginStore)
   943  
   944  	metricsSockPath, err := d.listenMetricsSock(&cfgStore.Config)
   945  	if err != nil {
   946  		return nil, err
   947  	}
   948  	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
   949  
   950  	backoffConfig := backoff.DefaultConfig
   951  	backoffConfig.MaxDelay = 3 * time.Second
   952  	connParams := grpc.ConnectParams{
   953  		Backoff: backoffConfig,
   954  	}
   955  	gopts := []grpc.DialOption{
   956  		// WithBlock makes sure that the following containerd request
   957  		// is reliable.
   958  		//
   959  		// NOTE: In one edge case with high load pressure, kernel kills
   960  		// dockerd, containerd and containerd-shims caused by OOM.
   961  		// When both dockerd and containerd restart, but containerd
   962  		// will take time to recover all the existing containers. Before
   963  		// containerd serving, dockerd will failed with gRPC error.
   964  		// That bad thing is that restore action will still ignore the
   965  		// any non-NotFound errors and returns running state for
   966  		// already stopped container. It is unexpected behavior. And
   967  		// we need to restart dockerd to make sure that anything is OK.
   968  		//
   969  		// It is painful. Add WithBlock can prevent the edge case. And
   970  		// n common case, the containerd will be serving in shortly.
   971  		// It is not harm to add WithBlock for containerd connection.
   972  		grpc.WithBlock(),
   973  
   974  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   975  		grpc.WithConnectParams(connParams),
   976  		grpc.WithContextDialer(dialer.ContextDialer),
   977  
   978  		// TODO(stevvooe): We may need to allow configuration of this on the client.
   979  		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
   980  		grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
   981  		grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()),   //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/moby/issues/47437
   982  		grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor()), //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/moby/issues/47437
   983  	}
   984  
   985  	if cfgStore.ContainerdAddr != "" {
   986  		d.containerdClient, err = containerd.New(
   987  			cfgStore.ContainerdAddr,
   988  			containerd.WithDefaultNamespace(cfgStore.ContainerdNamespace),
   989  			containerd.WithDialOpts(gopts),
   990  			containerd.WithTimeout(60*time.Second),
   991  		)
   992  		if err != nil {
   993  			return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
   994  		}
   995  	}
   996  
   997  	createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
   998  		var pluginCli *containerd.Client
   999  
  1000  		if cfgStore.ContainerdAddr != "" {
  1001  			pluginCli, err = containerd.New(
  1002  				cfgStore.ContainerdAddr,
  1003  				containerd.WithDefaultNamespace(cfgStore.ContainerdPluginNamespace),
  1004  				containerd.WithDialOpts(gopts),
  1005  				containerd.WithTimeout(60*time.Second),
  1006  			)
  1007  			if err != nil {
  1008  				return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
  1009  			}
  1010  		}
  1011  
  1012  		var (
  1013  			shim     string
  1014  			shimOpts interface{}
  1015  		)
  1016  		if runtime.GOOS != "windows" {
  1017  			shim, shimOpts, err = rts.Get("")
  1018  			if err != nil {
  1019  				return nil, err
  1020  			}
  1021  		}
  1022  		return pluginexec.New(ctx, getPluginExecRoot(&cfgStore.Config), pluginCli, cfgStore.ContainerdPluginNamespace, m, shim, shimOpts)
  1023  	}
  1024  
  1025  	// Plugin system initialization should happen before restore. Do not change order.
  1026  	d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{
  1027  		Root:               filepath.Join(cfgStore.Root, "plugins"),
  1028  		ExecRoot:           getPluginExecRoot(&cfgStore.Config),
  1029  		Store:              d.PluginStore,
  1030  		CreateExecutor:     createPluginExec,
  1031  		RegistryService:    registryService,
  1032  		LiveRestoreEnabled: cfgStore.LiveRestoreEnabled,
  1033  		LogPluginEvent:     d.LogPluginEvent, // todo: make private
  1034  		AuthzMiddleware:    authzMiddleware,
  1035  	})
  1036  	if err != nil {
  1037  		return nil, errors.Wrap(err, "couldn't create plugin manager")
  1038  	}
  1039  
  1040  	d.defaultLogConfig, err = defaultLogConfig(&cfgStore.Config)
  1041  	if err != nil {
  1042  		return nil, errors.Wrap(err, "failed to set log opts")
  1043  	}
  1044  	log.G(ctx).Debugf("Using default logging driver %s", d.defaultLogConfig.Type)
  1045  
  1046  	d.volumes, err = volumesservice.NewVolumeService(cfgStore.Root, d.PluginStore, rootIDs, d)
  1047  	if err != nil {
  1048  		return nil, err
  1049  	}
  1050  
  1051  	// Check if Devices cgroup is mounted, it is hard requirement for container security,
  1052  	// on Linux.
  1053  	//
  1054  	// Important: we call getSysInfo() directly here, without storing the results,
  1055  	// as networking has not yet been set up, so we only have partial system info
  1056  	// at this point.
  1057  	//
  1058  	// TODO(thaJeztah) add a utility to only collect the CgroupDevicesEnabled information
  1059  	if runtime.GOOS == "linux" && !userns.RunningInUserNS() && !getSysInfo(&cfgStore.Config).CgroupDevicesEnabled {
  1060  		return nil, errors.New("Devices cgroup isn't mounted")
  1061  	}
  1062  
  1063  	d.id, err = LoadOrCreateID(cfgStore.Root)
  1064  	if err != nil {
  1065  		return nil, err
  1066  	}
  1067  	d.repository = daemonRepo
  1068  	d.containers = container.NewMemoryStore()
  1069  	if d.containersReplica, err = container.NewViewDB(); err != nil {
  1070  		return nil, err
  1071  	}
  1072  	d.execCommands = container.NewExecStore()
  1073  	d.statsCollector = d.newStatsCollector(1 * time.Second)
  1074  
  1075  	d.EventsService = events.New()
  1076  	d.root = cfgStore.Root
  1077  	d.idMapping = idMapping
  1078  
  1079  	d.linkIndex = newLinkIndex()
  1080  
  1081  	// On Windows we don't support the environment variable, or a user supplied graphdriver
  1082  	// Unix platforms however run a single graphdriver for all containers, and it can
  1083  	// be set through an environment variable, a daemon start parameter, or chosen through
  1084  	// initialization of the layerstore through driver priority order for example.
  1085  	driverName := os.Getenv("DOCKER_DRIVER")
  1086  	if isWindows && d.UsesSnapshotter() {
  1087  		// Containerd WCOW snapshotter
  1088  		driverName = "windows"
  1089  	} else if isWindows {
  1090  		// Docker WCOW graphdriver
  1091  		driverName = "windowsfilter"
  1092  	} else if driverName != "" {
  1093  		log.G(ctx).Infof("Setting the storage driver from the $DOCKER_DRIVER environment variable (%s)", driverName)
  1094  	} else {
  1095  		driverName = cfgStore.GraphDriver
  1096  	}
  1097  
  1098  	if d.UsesSnapshotter() {
  1099  		if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
  1100  			log.G(ctx).Warn("Enabling containerd snapshotter through the $TEST_INTEGRATION_USE_SNAPSHOTTER environment variable. This should only be used for testing.")
  1101  		}
  1102  		log.G(ctx).Info("Starting daemon with containerd snapshotter integration enabled")
  1103  
  1104  		// FIXME(thaJeztah): implement automatic snapshotter-selection similar to graph-driver selection; see https://github.com/moby/moby/issues/44076
  1105  		if driverName == "" {
  1106  			driverName = containerd.DefaultSnapshotter
  1107  		}
  1108  
  1109  		// Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  1110  		// operation only, so it is safe to pass *just* the runtime OS graphdriver.
  1111  		if err := configureKernelSecuritySupport(&cfgStore.Config, driverName); err != nil {
  1112  			return nil, err
  1113  		}
  1114  		d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{
  1115  			Client:          d.containerdClient,
  1116  			Containers:      d.containers,
  1117  			Snapshotter:     driverName,
  1118  			RegistryHosts:   d.RegistryHosts,
  1119  			Registry:        d.registryService,
  1120  			EventsService:   d.EventsService,
  1121  			IDMapping:       idMapping,
  1122  			RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping),
  1123  		})
  1124  	} else {
  1125  		layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{
  1126  			Root:                      cfgStore.Root,
  1127  			MetadataStorePathTemplate: filepath.Join(cfgStore.Root, "image", "%s", "layerdb"),
  1128  			GraphDriver:               driverName,
  1129  			GraphDriverOptions:        cfgStore.GraphOptions,
  1130  			IDMapping:                 idMapping,
  1131  			PluginGetter:              d.PluginStore,
  1132  			ExperimentalEnabled:       cfgStore.Experimental,
  1133  		})
  1134  		if err != nil {
  1135  			return nil, err
  1136  		}
  1137  
  1138  		// Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  1139  		// operation only, so it is safe to pass *just* the runtime OS graphdriver.
  1140  		if err := configureKernelSecuritySupport(&cfgStore.Config, layerStore.DriverName()); err != nil {
  1141  			return nil, err
  1142  		}
  1143  
  1144  		imageRoot := filepath.Join(cfgStore.Root, "image", layerStore.DriverName())
  1145  		ifs, err := image.NewFSStoreBackend(filepath.Join(imageRoot, "imagedb"))
  1146  		if err != nil {
  1147  			return nil, err
  1148  		}
  1149  
  1150  		// We have a single tag/reference store for the daemon globally. However, it's
  1151  		// stored under the graphdriver. On host platforms which only support a single
  1152  		// container OS, but multiple selectable graphdrivers, this means depending on which
  1153  		// graphdriver is chosen, the global reference store is under there. For
  1154  		// platforms which support multiple container operating systems, this is slightly
  1155  		// more problematic as where does the global ref store get located? Fortunately,
  1156  		// for Windows, which is currently the only daemon supporting multiple container
  1157  		// operating systems, the list of graphdrivers available isn't user configurable.
  1158  		// For backwards compatibility, we just put it under the windowsfilter
  1159  		// directory regardless.
  1160  		refStoreLocation := filepath.Join(imageRoot, `repositories.json`)
  1161  		rs, err := refstore.NewReferenceStore(refStoreLocation)
  1162  		if err != nil {
  1163  			return nil, fmt.Errorf("Couldn't create reference store repository: %s", err)
  1164  		}
  1165  		d.ReferenceStore = rs
  1166  
  1167  		imageStore, err := image.NewImageStore(ifs, layerStore)
  1168  		if err != nil {
  1169  			return nil, err
  1170  		}
  1171  
  1172  		distributionMetadataStore, err := dmetadata.NewFSMetadataStore(filepath.Join(imageRoot, "distribution"))
  1173  		if err != nil {
  1174  			return nil, err
  1175  		}
  1176  
  1177  		imgSvcConfig := images.ImageServiceConfig{
  1178  			ContainerStore:            d.containers,
  1179  			DistributionMetadataStore: distributionMetadataStore,
  1180  			EventsService:             d.EventsService,
  1181  			ImageStore:                imageStore,
  1182  			LayerStore:                layerStore,
  1183  			MaxConcurrentDownloads:    config.MaxConcurrentDownloads,
  1184  			MaxConcurrentUploads:      config.MaxConcurrentUploads,
  1185  			MaxDownloadAttempts:       config.MaxDownloadAttempts,
  1186  			ReferenceStore:            rs,
  1187  			RegistryService:           registryService,
  1188  			ContentNamespace:          config.ContainerdNamespace,
  1189  		}
  1190  
  1191  		// containerd is not currently supported with Windows.
  1192  		// So sometimes d.containerdCli will be nil
  1193  		// In that case we'll create a local content store... but otherwise we'll use containerd
  1194  		if d.containerdClient != nil {
  1195  			imgSvcConfig.Leases = d.containerdClient.LeasesService()
  1196  			imgSvcConfig.ContentStore = d.containerdClient.ContentStore()
  1197  		} else {
  1198  			imgSvcConfig.ContentStore, imgSvcConfig.Leases, err = d.configureLocalContentStore(config.ContainerdNamespace)
  1199  			if err != nil {
  1200  				return nil, err
  1201  			}
  1202  		}
  1203  
  1204  		// TODO: imageStore, distributionMetadataStore, and ReferenceStore are only
  1205  		// used above to run migration. They could be initialized in ImageService
  1206  		// if migration is called from daemon/images. layerStore might move as well.
  1207  		d.imageService = images.NewImageService(imgSvcConfig)
  1208  
  1209  		log.G(ctx).Debugf("Max Concurrent Downloads: %d", imgSvcConfig.MaxConcurrentDownloads)
  1210  		log.G(ctx).Debugf("Max Concurrent Uploads: %d", imgSvcConfig.MaxConcurrentUploads)
  1211  		log.G(ctx).Debugf("Max Download Attempts: %d", imgSvcConfig.MaxDownloadAttempts)
  1212  	}
  1213  
  1214  	go d.execCommandGC()
  1215  
  1216  	if err := d.initLibcontainerd(ctx, &cfgStore.Config); err != nil {
  1217  		return nil, err
  1218  	}
  1219  
  1220  	if err := d.restore(cfgStore); err != nil {
  1221  		return nil, err
  1222  	}
  1223  	close(d.startupDone)
  1224  
  1225  	info, err := d.SystemInfo(ctx)
  1226  	if err != nil {
  1227  		return nil, err
  1228  	}
  1229  	for _, w := range info.Warnings {
  1230  		log.G(ctx).Warn(w)
  1231  	}
  1232  
  1233  	engineInfo.WithValues(
  1234  		dockerversion.Version,
  1235  		dockerversion.GitCommit,
  1236  		info.Architecture,
  1237  		info.Driver,
  1238  		info.KernelVersion,
  1239  		info.OperatingSystem,
  1240  		info.OSType,
  1241  		info.OSVersion,
  1242  		info.ID,
  1243  	).Set(1)
  1244  	engineCpus.Set(float64(info.NCPU))
  1245  	engineMemory.Set(float64(info.MemTotal))
  1246  
  1247  	log.G(ctx).WithFields(log.Fields{
  1248  		"version":                dockerversion.Version,
  1249  		"commit":                 dockerversion.GitCommit,
  1250  		"storage-driver":         d.ImageService().StorageDriver(),
  1251  		"containerd-snapshotter": d.UsesSnapshotter(),
  1252  	}).Info("Docker daemon")
  1253  
  1254  	return d, nil
  1255  }
  1256  
  1257  // DistributionServices returns services controlling daemon storage
  1258  func (daemon *Daemon) DistributionServices() images.DistributionServices {
  1259  	return daemon.imageService.DistributionServices()
  1260  }
  1261  
  1262  func (daemon *Daemon) waitForStartupDone() {
  1263  	<-daemon.startupDone
  1264  }
  1265  
  1266  func (daemon *Daemon) shutdownContainer(c *container.Container) error {
  1267  	ctx := compatcontext.WithoutCancel(context.TODO())
  1268  
  1269  	// If container failed to exit in stopTimeout seconds of SIGTERM, then using the force
  1270  	if err := daemon.containerStop(ctx, c, containertypes.StopOptions{}); err != nil {
  1271  		return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err)
  1272  	}
  1273  
  1274  	// Wait without timeout for the container to exit.
  1275  	// Ignore the result.
  1276  	<-c.Wait(ctx, container.WaitConditionNotRunning)
  1277  	return nil
  1278  }
  1279  
  1280  // ShutdownTimeout returns the timeout (in seconds) before containers are forcibly
  1281  // killed during shutdown. The default timeout can be configured both on the daemon
  1282  // and per container, and the longest timeout will be used. A grace-period of
  1283  // 5 seconds is added to the configured timeout.
  1284  //
  1285  // A negative (-1) timeout means "indefinitely", which means that containers
  1286  // are not forcibly killed, and the daemon shuts down after all containers exit.
  1287  func (daemon *Daemon) ShutdownTimeout() int {
  1288  	return daemon.shutdownTimeout(&daemon.config().Config)
  1289  }
  1290  
  1291  func (daemon *Daemon) shutdownTimeout(cfg *config.Config) int {
  1292  	shutdownTimeout := cfg.ShutdownTimeout
  1293  	if shutdownTimeout < 0 {
  1294  		return -1
  1295  	}
  1296  	if daemon.containers == nil {
  1297  		return shutdownTimeout
  1298  	}
  1299  
  1300  	graceTimeout := 5
  1301  	for _, c := range daemon.containers.List() {
  1302  		stopTimeout := c.StopTimeout()
  1303  		if stopTimeout < 0 {
  1304  			return -1
  1305  		}
  1306  		if stopTimeout+graceTimeout > shutdownTimeout {
  1307  			shutdownTimeout = stopTimeout + graceTimeout
  1308  		}
  1309  	}
  1310  	return shutdownTimeout
  1311  }
  1312  
  1313  // Shutdown stops the daemon.
  1314  func (daemon *Daemon) Shutdown(ctx context.Context) error {
  1315  	daemon.shutdown = true
  1316  	// Keep mounts and networking running on daemon shutdown if
  1317  	// we are to keep containers running and restore them.
  1318  
  1319  	cfg := &daemon.config().Config
  1320  	if cfg.LiveRestoreEnabled && daemon.containers != nil {
  1321  		// check if there are any running containers, if none we should do some cleanup
  1322  		if ls, err := daemon.Containers(ctx, &containertypes.ListOptions{}); len(ls) != 0 || err != nil {
  1323  			// metrics plugins still need some cleanup
  1324  			daemon.cleanupMetricsPlugins()
  1325  			return err
  1326  		}
  1327  	}
  1328  
  1329  	if daemon.containers != nil {
  1330  		log.G(ctx).Debugf("daemon configured with a %d seconds minimum shutdown timeout", cfg.ShutdownTimeout)
  1331  		log.G(ctx).Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.shutdownTimeout(cfg))
  1332  		daemon.containers.ApplyAll(func(c *container.Container) {
  1333  			if !c.IsRunning() {
  1334  				return
  1335  			}
  1336  			logger := log.G(ctx).WithField("container", c.ID)
  1337  			logger.Debug("shutting down container")
  1338  			if err := daemon.shutdownContainer(c); err != nil {
  1339  				logger.WithError(err).Error("failed to shut down container")
  1340  				return
  1341  			}
  1342  			if mountid, err := daemon.imageService.GetLayerMountID(c.ID); err == nil {
  1343  				daemon.cleanupMountsByID(mountid)
  1344  			}
  1345  			logger.Debugf("shut down container")
  1346  		})
  1347  	}
  1348  
  1349  	if daemon.volumes != nil {
  1350  		if err := daemon.volumes.Shutdown(); err != nil {
  1351  			log.G(ctx).Errorf("Error shutting down volume store: %v", err)
  1352  		}
  1353  	}
  1354  
  1355  	if daemon.imageService != nil {
  1356  		if err := daemon.imageService.Cleanup(); err != nil {
  1357  			log.G(ctx).Error(err)
  1358  		}
  1359  	}
  1360  
  1361  	// If we are part of a cluster, clean up cluster's stuff
  1362  	if daemon.clusterProvider != nil {
  1363  		log.G(ctx).Debugf("start clean shutdown of cluster resources...")
  1364  		daemon.DaemonLeavesCluster()
  1365  	}
  1366  
  1367  	daemon.cleanupMetricsPlugins()
  1368  
  1369  	// Shutdown plugins after containers and layerstore. Don't change the order.
  1370  	daemon.pluginShutdown()
  1371  
  1372  	// trigger libnetwork Stop only if it's initialized
  1373  	if daemon.netController != nil {
  1374  		daemon.netController.Stop()
  1375  	}
  1376  
  1377  	if daemon.containerdClient != nil {
  1378  		daemon.containerdClient.Close()
  1379  	}
  1380  
  1381  	if daemon.mdDB != nil {
  1382  		daemon.mdDB.Close()
  1383  	}
  1384  
  1385  	return daemon.cleanupMounts(cfg)
  1386  }
  1387  
  1388  // Mount sets container.BaseFS
  1389  func (daemon *Daemon) Mount(container *container.Container) error {
  1390  	return daemon.imageService.Mount(context.Background(), container)
  1391  }
  1392  
  1393  // Unmount unsets the container base filesystem
  1394  func (daemon *Daemon) Unmount(container *container.Container) error {
  1395  	return daemon.imageService.Unmount(context.Background(), container)
  1396  }
  1397  
  1398  // Subnets return the IPv4 and IPv6 subnets of networks that are manager by Docker.
  1399  func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) {
  1400  	var v4Subnets []net.IPNet
  1401  	var v6Subnets []net.IPNet
  1402  
  1403  	for _, managedNetwork := range daemon.netController.Networks(context.TODO()) {
  1404  		v4infos, v6infos := managedNetwork.IpamInfo()
  1405  		for _, info := range v4infos {
  1406  			if info.IPAMData.Pool != nil {
  1407  				v4Subnets = append(v4Subnets, *info.IPAMData.Pool)
  1408  			}
  1409  		}
  1410  		for _, info := range v6infos {
  1411  			if info.IPAMData.Pool != nil {
  1412  				v6Subnets = append(v6Subnets, *info.IPAMData.Pool)
  1413  			}
  1414  		}
  1415  	}
  1416  
  1417  	return v4Subnets, v6Subnets
  1418  }
  1419  
  1420  // prepareTempDir prepares and returns the default directory to use
  1421  // for temporary files.
  1422  // If it doesn't exist, it is created. If it exists, its content is removed.
  1423  func prepareTempDir(rootDir string) (string, error) {
  1424  	var tmpDir string
  1425  	if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" {
  1426  		tmpDir = filepath.Join(rootDir, "tmp")
  1427  		newName := tmpDir + "-old"
  1428  		if err := os.Rename(tmpDir, newName); err == nil {
  1429  			go func() {
  1430  				if err := os.RemoveAll(newName); err != nil {
  1431  					log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", newName)
  1432  				}
  1433  			}()
  1434  		} else if !os.IsNotExist(err) {
  1435  			log.G(context.TODO()).Warnf("failed to rename %s for background deletion: %s. Deleting synchronously", tmpDir, err)
  1436  			if err := os.RemoveAll(tmpDir); err != nil {
  1437  				log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", tmpDir)
  1438  			}
  1439  		}
  1440  	}
  1441  	return tmpDir, idtools.MkdirAllAndChown(tmpDir, 0o700, idtools.CurrentIdentity())
  1442  }
  1443  
  1444  func (daemon *Daemon) setGenericResources(conf *config.Config) error {
  1445  	genericResources, err := config.ParseGenericResources(conf.NodeGenericResources)
  1446  	if err != nil {
  1447  		return err
  1448  	}
  1449  
  1450  	daemon.genericResources = genericResources
  1451  
  1452  	return nil
  1453  }
  1454  
  1455  // IsShuttingDown tells whether the daemon is shutting down or not
  1456  func (daemon *Daemon) IsShuttingDown() bool {
  1457  	return daemon.shutdown
  1458  }
  1459  
  1460  func isBridgeNetworkDisabled(conf *config.Config) bool {
  1461  	return conf.BridgeConfig.Iface == config.DisableNetworkBridge
  1462  }
  1463  
  1464  func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) {
  1465  	dd := runconfig.DefaultDaemonNetworkMode()
  1466  
  1467  	options := []nwconfig.Option{
  1468  		nwconfig.OptionDataDir(conf.Root),
  1469  		nwconfig.OptionExecRoot(conf.GetExecRoot()),
  1470  		nwconfig.OptionDefaultDriver(string(dd)),
  1471  		nwconfig.OptionDefaultNetwork(dd.NetworkName()),
  1472  		nwconfig.OptionLabels(conf.Labels),
  1473  		nwconfig.OptionNetworkControlPlaneMTU(conf.NetworkControlPlaneMTU),
  1474  		driverOptions(conf),
  1475  	}
  1476  
  1477  	if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 {
  1478  		options = append(options, nwconfig.OptionDefaultAddressPoolConfig(conf.NetworkConfig.DefaultAddressPools.Value()))
  1479  	}
  1480  	if conf.LiveRestoreEnabled && len(activeSandboxes) != 0 {
  1481  		options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes))
  1482  	}
  1483  	if pg != nil {
  1484  		options = append(options, nwconfig.OptionPluginGetter(pg))
  1485  	}
  1486  
  1487  	return options, nil
  1488  }
  1489  
  1490  // GetCluster returns the cluster
  1491  func (daemon *Daemon) GetCluster() Cluster {
  1492  	return daemon.cluster
  1493  }
  1494  
  1495  // SetCluster sets the cluster
  1496  func (daemon *Daemon) SetCluster(cluster Cluster) {
  1497  	daemon.cluster = cluster
  1498  }
  1499  
  1500  func (daemon *Daemon) pluginShutdown() {
  1501  	manager := daemon.pluginManager
  1502  	// Check for a valid manager object. In error conditions, daemon init can fail
  1503  	// and shutdown called, before plugin manager is initialized.
  1504  	if manager != nil {
  1505  		manager.Shutdown()
  1506  	}
  1507  }
  1508  
  1509  // PluginManager returns current pluginManager associated with the daemon
  1510  func (daemon *Daemon) PluginManager() *plugin.Manager { // set up before daemon to avoid this method
  1511  	return daemon.pluginManager
  1512  }
  1513  
  1514  // PluginGetter returns current pluginStore associated with the daemon
  1515  func (daemon *Daemon) PluginGetter() *plugin.Store {
  1516  	return daemon.PluginStore
  1517  }
  1518  
  1519  // CreateDaemonRoot creates the root for the daemon
  1520  func CreateDaemonRoot(config *config.Config) error {
  1521  	// get the canonical path to the Docker root directory
  1522  	var realRoot string
  1523  	if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) {
  1524  		realRoot = config.Root
  1525  	} else {
  1526  		realRoot, err = fileutils.ReadSymlinkedDirectory(config.Root)
  1527  		if err != nil {
  1528  			return fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err)
  1529  		}
  1530  	}
  1531  
  1532  	idMapping, err := setupRemappedRoot(config)
  1533  	if err != nil {
  1534  		return err
  1535  	}
  1536  	return setupDaemonRoot(config, realRoot, idMapping.RootPair())
  1537  }
  1538  
  1539  // RemapContainerdNamespaces returns the right containerd namespaces to use:
  1540  // - if they are not already set in the config file
  1541  // -  and the daemon is running with user namespace remapping enabled
  1542  // Then it will return new namespace names, otherwise it will return the existing
  1543  // namespaces
  1544  func RemapContainerdNamespaces(config *config.Config) (ns string, pluginNs string, err error) {
  1545  	idMapping, err := setupRemappedRoot(config)
  1546  	if err != nil {
  1547  		return "", "", err
  1548  	}
  1549  	if idMapping.Empty() {
  1550  		return config.ContainerdNamespace, config.ContainerdPluginNamespace, nil
  1551  	}
  1552  	root := idMapping.RootPair()
  1553  
  1554  	ns = config.ContainerdNamespace
  1555  	if _, ok := config.ValuesSet["containerd-namespace"]; !ok {
  1556  		ns = fmt.Sprintf("%s-%d.%d", config.ContainerdNamespace, root.UID, root.GID)
  1557  	}
  1558  
  1559  	pluginNs = config.ContainerdPluginNamespace
  1560  	if _, ok := config.ValuesSet["containerd-plugin-namespace"]; !ok {
  1561  		pluginNs = fmt.Sprintf("%s-%d.%d", config.ContainerdPluginNamespace, root.UID, root.GID)
  1562  	}
  1563  
  1564  	return
  1565  }
  1566  
  1567  // checkpointAndSave grabs a container lock to safely call container.CheckpointTo
  1568  func (daemon *Daemon) checkpointAndSave(container *container.Container) error {
  1569  	container.Lock()
  1570  	defer container.Unlock()
  1571  	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
  1572  		return fmt.Errorf("Error saving container state: %v", err)
  1573  	}
  1574  	return nil
  1575  }
  1576  
  1577  // because the CLI sends a -1 when it wants to unset the swappiness value
  1578  // we need to clear it on the server side
  1579  func fixMemorySwappiness(resources *containertypes.Resources) {
  1580  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness == -1 {
  1581  		resources.MemorySwappiness = nil
  1582  	}
  1583  }
  1584  
  1585  // GetAttachmentStore returns current attachment store associated with the daemon
  1586  func (daemon *Daemon) GetAttachmentStore() *network.AttachmentStore {
  1587  	return &daemon.attachmentStore
  1588  }
  1589  
  1590  // IdentityMapping returns uid/gid mapping or a SID (in the case of Windows) for the builder
  1591  func (daemon *Daemon) IdentityMapping() idtools.IdentityMapping {
  1592  	return daemon.idMapping
  1593  }
  1594  
  1595  // ImageService returns the Daemon's ImageService
  1596  func (daemon *Daemon) ImageService() ImageService {
  1597  	return daemon.imageService
  1598  }
  1599  
  1600  // ImageBackend returns an image-backend for Swarm and the distribution router.
  1601  func (daemon *Daemon) ImageBackend() executorpkg.ImageBackend {
  1602  	return &imageBackend{
  1603  		ImageService:    daemon.imageService,
  1604  		registryService: daemon.registryService,
  1605  	}
  1606  }
  1607  
  1608  // RegistryService returns the Daemon's RegistryService
  1609  func (daemon *Daemon) RegistryService() *registry.Service {
  1610  	return daemon.registryService
  1611  }
  1612  
  1613  // BuilderBackend returns the backend used by builder
  1614  func (daemon *Daemon) BuilderBackend() builder.Backend {
  1615  	return struct {
  1616  		*Daemon
  1617  		ImageService
  1618  	}{daemon, daemon.imageService}
  1619  }
  1620  
  1621  // RawSysInfo returns *sysinfo.SysInfo .
  1622  func (daemon *Daemon) RawSysInfo() *sysinfo.SysInfo {
  1623  	daemon.sysInfoOnce.Do(func() {
  1624  		// We check if sysInfo is not set here, to allow some test to
  1625  		// override the actual sysInfo.
  1626  		if daemon.sysInfo == nil {
  1627  			daemon.sysInfo = getSysInfo(&daemon.config().Config)
  1628  		}
  1629  	})
  1630  
  1631  	return daemon.sysInfo
  1632  }
  1633  
  1634  // imageBackend is used to satisfy the [executorpkg.ImageBackend] and
  1635  // [github.com/docker/docker/api/server/router/distribution.Backend]
  1636  // interfaces.
  1637  type imageBackend struct {
  1638  	ImageService
  1639  	registryService *registry.Service
  1640  }
  1641  
  1642  // GetRepositories returns a list of repositories configured for the given
  1643  // reference. Multiple repositories can be returned if the reference is for
  1644  // the default (Docker Hub) registry and a mirror is configured, but it omits
  1645  // registries that were not reachable (pinging the /v2/ endpoint failed).
  1646  //
  1647  // It returns an error if it was unable to reach any of the registries for
  1648  // the given reference, or if the provided reference is invalid.
  1649  func (i *imageBackend) GetRepositories(ctx context.Context, ref reference.Named, authConfig *registrytypes.AuthConfig) ([]dist.Repository, error) {
  1650  	return distribution.GetRepositories(ctx, ref, &distribution.ImagePullConfig{
  1651  		Config: distribution.Config{
  1652  			AuthConfig:      authConfig,
  1653  			RegistryService: i.registryService,
  1654  		},
  1655  	})
  1656  }