github.com/mforkel/docker-ce-i386@v17.12.1-ce-rc2+incompatible/components/engine/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bufio"
     7  	"bytes"
     8  	"context"
     9  	"fmt"
    10  	"io/ioutil"
    11  	"net"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"runtime/debug"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  
    20  	containerd_cgroups "github.com/containerd/cgroups"
    21  	"github.com/docker/docker/api/types"
    22  	"github.com/docker/docker/api/types/blkiodev"
    23  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    24  	containertypes "github.com/docker/docker/api/types/container"
    25  	"github.com/docker/docker/container"
    26  	"github.com/docker/docker/daemon/config"
    27  	"github.com/docker/docker/image"
    28  	"github.com/docker/docker/opts"
    29  	"github.com/docker/docker/pkg/containerfs"
    30  	"github.com/docker/docker/pkg/idtools"
    31  	"github.com/docker/docker/pkg/ioutils"
    32  	"github.com/docker/docker/pkg/mount"
    33  	"github.com/docker/docker/pkg/parsers"
    34  	"github.com/docker/docker/pkg/parsers/kernel"
    35  	"github.com/docker/docker/pkg/sysinfo"
    36  	"github.com/docker/docker/runconfig"
    37  	"github.com/docker/docker/volume"
    38  	"github.com/docker/libnetwork"
    39  	nwconfig "github.com/docker/libnetwork/config"
    40  	"github.com/docker/libnetwork/drivers/bridge"
    41  	"github.com/docker/libnetwork/netlabel"
    42  	"github.com/docker/libnetwork/netutils"
    43  	"github.com/docker/libnetwork/options"
    44  	lntypes "github.com/docker/libnetwork/types"
    45  	"github.com/opencontainers/runc/libcontainer/cgroups"
    46  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    47  	specs "github.com/opencontainers/runtime-spec/specs-go"
    48  	"github.com/opencontainers/selinux/go-selinux/label"
    49  	"github.com/pkg/errors"
    50  	"github.com/sirupsen/logrus"
    51  	"github.com/vishvananda/netlink"
    52  	"golang.org/x/sys/unix"
    53  )
    54  
    55  const (
    56  	// DefaultShimBinary is the default shim to be used by containerd if none
    57  	// is specified
    58  	DefaultShimBinary = "docker-containerd-shim"
    59  
    60  	// DefaultRuntimeBinary is the default runtime to be used by
    61  	// containerd if none is specified
    62  	DefaultRuntimeBinary = "docker-runc"
    63  
    64  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    65  	linuxMinCPUShares = 2
    66  	linuxMaxCPUShares = 262144
    67  	platformSupported = true
    68  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    69  	linuxMinMemory = 4194304
    70  	// constants for remapped root settings
    71  	defaultIDSpecifier string = "default"
    72  	defaultRemappedID  string = "dockremap"
    73  
    74  	// constant for cgroup drivers
    75  	cgroupFsDriver      = "cgroupfs"
    76  	cgroupSystemdDriver = "systemd"
    77  
    78  	// DefaultRuntimeName is the default runtime to be used by
    79  	// containerd if none is specified
    80  	DefaultRuntimeName = "docker-runc"
    81  )
    82  
    83  type containerGetter interface {
    84  	GetContainer(string) (*container.Container, error)
    85  }
    86  
    87  func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory {
    88  	memory := specs.LinuxMemory{}
    89  
    90  	if config.Memory > 0 {
    91  		memory.Limit = &config.Memory
    92  	}
    93  
    94  	if config.MemoryReservation > 0 {
    95  		memory.Reservation = &config.MemoryReservation
    96  	}
    97  
    98  	if config.MemorySwap > 0 {
    99  		memory.Swap = &config.MemorySwap
   100  	}
   101  
   102  	if config.MemorySwappiness != nil {
   103  		swappiness := uint64(*config.MemorySwappiness)
   104  		memory.Swappiness = &swappiness
   105  	}
   106  
   107  	if config.KernelMemory != 0 {
   108  		memory.Kernel = &config.KernelMemory
   109  	}
   110  
   111  	return &memory
   112  }
   113  
   114  func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) {
   115  	cpu := specs.LinuxCPU{}
   116  
   117  	if config.CPUShares < 0 {
   118  		return nil, fmt.Errorf("shares: invalid argument")
   119  	}
   120  	if config.CPUShares >= 0 {
   121  		shares := uint64(config.CPUShares)
   122  		cpu.Shares = &shares
   123  	}
   124  
   125  	if config.CpusetCpus != "" {
   126  		cpu.Cpus = config.CpusetCpus
   127  	}
   128  
   129  	if config.CpusetMems != "" {
   130  		cpu.Mems = config.CpusetMems
   131  	}
   132  
   133  	if config.NanoCPUs > 0 {
   134  		// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   135  		period := uint64(100 * time.Millisecond / time.Microsecond)
   136  		quota := config.NanoCPUs * int64(period) / 1e9
   137  		cpu.Period = &period
   138  		cpu.Quota = &quota
   139  	}
   140  
   141  	if config.CPUPeriod != 0 {
   142  		period := uint64(config.CPUPeriod)
   143  		cpu.Period = &period
   144  	}
   145  
   146  	if config.CPUQuota != 0 {
   147  		q := config.CPUQuota
   148  		cpu.Quota = &q
   149  	}
   150  
   151  	if config.CPURealtimePeriod != 0 {
   152  		period := uint64(config.CPURealtimePeriod)
   153  		cpu.RealtimePeriod = &period
   154  	}
   155  
   156  	if config.CPURealtimeRuntime != 0 {
   157  		c := config.CPURealtimeRuntime
   158  		cpu.RealtimeRuntime = &c
   159  	}
   160  
   161  	return &cpu, nil
   162  }
   163  
   164  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) {
   165  	var stat unix.Stat_t
   166  	var blkioWeightDevices []specs.LinuxWeightDevice
   167  
   168  	for _, weightDevice := range config.BlkioWeightDevice {
   169  		if err := unix.Stat(weightDevice.Path, &stat); err != nil {
   170  			return nil, err
   171  		}
   172  		weight := weightDevice.Weight
   173  		d := specs.LinuxWeightDevice{Weight: &weight}
   174  		d.Major = int64(stat.Rdev / 256)
   175  		d.Minor = int64(stat.Rdev % 256)
   176  		blkioWeightDevices = append(blkioWeightDevices, d)
   177  	}
   178  
   179  	return blkioWeightDevices, nil
   180  }
   181  
   182  func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error {
   183  	container.NoNewPrivileges = daemon.configStore.NoNewPrivileges
   184  	return parseSecurityOpt(container, hostConfig)
   185  }
   186  
   187  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   188  	var (
   189  		labelOpts []string
   190  		err       error
   191  	)
   192  
   193  	for _, opt := range config.SecurityOpt {
   194  		if opt == "no-new-privileges" {
   195  			container.NoNewPrivileges = true
   196  			continue
   197  		}
   198  		if opt == "disable" {
   199  			labelOpts = append(labelOpts, "disable")
   200  			continue
   201  		}
   202  
   203  		var con []string
   204  		if strings.Contains(opt, "=") {
   205  			con = strings.SplitN(opt, "=", 2)
   206  		} else if strings.Contains(opt, ":") {
   207  			con = strings.SplitN(opt, ":", 2)
   208  			logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.")
   209  		}
   210  		if len(con) != 2 {
   211  			return fmt.Errorf("invalid --security-opt 1: %q", opt)
   212  		}
   213  
   214  		switch con[0] {
   215  		case "label":
   216  			labelOpts = append(labelOpts, con[1])
   217  		case "apparmor":
   218  			container.AppArmorProfile = con[1]
   219  		case "seccomp":
   220  			container.SeccompProfile = con[1]
   221  		case "no-new-privileges":
   222  			noNewPrivileges, err := strconv.ParseBool(con[1])
   223  			if err != nil {
   224  				return fmt.Errorf("invalid --security-opt 2: %q", opt)
   225  			}
   226  			container.NoNewPrivileges = noNewPrivileges
   227  		default:
   228  			return fmt.Errorf("invalid --security-opt 2: %q", opt)
   229  		}
   230  	}
   231  
   232  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   233  	return err
   234  }
   235  
   236  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) {
   237  	var throttleDevices []specs.LinuxThrottleDevice
   238  	var stat unix.Stat_t
   239  
   240  	for _, d := range devs {
   241  		if err := unix.Stat(d.Path, &stat); err != nil {
   242  			return nil, err
   243  		}
   244  		d := specs.LinuxThrottleDevice{Rate: d.Rate}
   245  		d.Major = int64(stat.Rdev / 256)
   246  		d.Minor = int64(stat.Rdev % 256)
   247  		throttleDevices = append(throttleDevices, d)
   248  	}
   249  
   250  	return throttleDevices, nil
   251  }
   252  
   253  func checkKernel() error {
   254  	// Check for unsupported kernel versions
   255  	// FIXME: it would be cleaner to not test for specific versions, but rather
   256  	// test for specific functionalities.
   257  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   258  	// without actually causing a kernel panic, so we need this workaround until
   259  	// the circumstances of pre-3.10 crashes are clearer.
   260  	// For details see https://github.com/docker/docker/issues/407
   261  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   262  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   263  	if !kernel.CheckKernelVersion(3, 10, 0) {
   264  		v, _ := kernel.GetKernelVersion()
   265  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   266  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   267  		}
   268  	}
   269  	return nil
   270  }
   271  
   272  // adaptContainerSettings is called during container creation to modify any
   273  // settings necessary in the HostConfig structure.
   274  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   275  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   276  		// Handle unsupported CPUShares
   277  		if hostConfig.CPUShares < linuxMinCPUShares {
   278  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   279  			hostConfig.CPUShares = linuxMinCPUShares
   280  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   281  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   282  			hostConfig.CPUShares = linuxMaxCPUShares
   283  		}
   284  	}
   285  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   286  		// By default, MemorySwap is set to twice the size of Memory.
   287  		hostConfig.MemorySwap = hostConfig.Memory * 2
   288  	}
   289  	if hostConfig.ShmSize == 0 {
   290  		hostConfig.ShmSize = config.DefaultShmSize
   291  		if daemon.configStore != nil {
   292  			hostConfig.ShmSize = int64(daemon.configStore.ShmSize)
   293  		}
   294  	}
   295  	// Set default IPC mode, if unset for container
   296  	if hostConfig.IpcMode.IsEmpty() {
   297  		m := config.DefaultIpcMode
   298  		if daemon.configStore != nil {
   299  			m = daemon.configStore.IpcMode
   300  		}
   301  		hostConfig.IpcMode = containertypes.IpcMode(m)
   302  	}
   303  
   304  	adaptSharedNamespaceContainer(daemon, hostConfig)
   305  
   306  	var err error
   307  	opts, err := daemon.generateSecurityOpt(hostConfig)
   308  	if err != nil {
   309  		return err
   310  	}
   311  	hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
   312  	if hostConfig.OomKillDisable == nil {
   313  		defaultOomKillDisable := false
   314  		hostConfig.OomKillDisable = &defaultOomKillDisable
   315  	}
   316  
   317  	return nil
   318  }
   319  
   320  // adaptSharedNamespaceContainer replaces container name with its ID in hostConfig.
   321  // To be more precisely, it modifies `container:name` to `container:ID` of PidMode, IpcMode
   322  // and NetworkMode.
   323  //
   324  // When a container shares its namespace with another container, use ID can keep the namespace
   325  // sharing connection between the two containers even the another container is renamed.
   326  func adaptSharedNamespaceContainer(daemon containerGetter, hostConfig *containertypes.HostConfig) {
   327  	containerPrefix := "container:"
   328  	if hostConfig.PidMode.IsContainer() {
   329  		pidContainer := hostConfig.PidMode.Container()
   330  		// if there is any error returned here, we just ignore it and leave it to be
   331  		// handled in the following logic
   332  		if c, err := daemon.GetContainer(pidContainer); err == nil {
   333  			hostConfig.PidMode = containertypes.PidMode(containerPrefix + c.ID)
   334  		}
   335  	}
   336  	if hostConfig.IpcMode.IsContainer() {
   337  		ipcContainer := hostConfig.IpcMode.Container()
   338  		if c, err := daemon.GetContainer(ipcContainer); err == nil {
   339  			hostConfig.IpcMode = containertypes.IpcMode(containerPrefix + c.ID)
   340  		}
   341  	}
   342  	if hostConfig.NetworkMode.IsContainer() {
   343  		netContainer := hostConfig.NetworkMode.ConnectedContainer()
   344  		if c, err := daemon.GetContainer(netContainer); err == nil {
   345  			hostConfig.NetworkMode = containertypes.NetworkMode(containerPrefix + c.ID)
   346  		}
   347  	}
   348  }
   349  
   350  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   351  	warnings := []string{}
   352  	fixMemorySwappiness(resources)
   353  
   354  	// memory subsystem checks and adjustments
   355  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   356  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   357  	}
   358  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   359  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   360  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   361  		resources.Memory = 0
   362  		resources.MemorySwap = -1
   363  	}
   364  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   365  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   366  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   367  		resources.MemorySwap = -1
   368  	}
   369  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   370  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   371  	}
   372  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   373  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   374  	}
   375  	if resources.MemorySwappiness != nil && !sysInfo.MemorySwappiness {
   376  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   377  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   378  		resources.MemorySwappiness = nil
   379  	}
   380  	if resources.MemorySwappiness != nil {
   381  		swappiness := *resources.MemorySwappiness
   382  		if swappiness < 0 || swappiness > 100 {
   383  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   384  		}
   385  	}
   386  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   387  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   388  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   389  		resources.MemoryReservation = 0
   390  	}
   391  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   392  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   393  	}
   394  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   395  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   396  	}
   397  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   398  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   399  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   400  		resources.KernelMemory = 0
   401  	}
   402  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   403  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   404  	}
   405  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   406  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   407  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   408  	}
   409  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   410  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   411  		// warning the caller if they already wanted the feature to be off
   412  		if *resources.OomKillDisable {
   413  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   414  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   415  		}
   416  		resources.OomKillDisable = nil
   417  	}
   418  
   419  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   420  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   421  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   422  		resources.PidsLimit = 0
   423  	}
   424  
   425  	// cpu subsystem checks and adjustments
   426  	if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
   427  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
   428  	}
   429  	if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
   430  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
   431  	}
   432  	if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
   433  		return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
   434  	}
   435  	// The highest precision we could get on Linux is 0.001, by setting
   436  	//   cpu.cfs_period_us=1000ms
   437  	//   cpu.cfs_quota=1ms
   438  	// See the following link for details:
   439  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   440  	// Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
   441  	// The error message is 0.01 so that this is consistent with Windows
   442  	if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
   443  		return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
   444  	}
   445  
   446  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   447  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   448  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   449  		resources.CPUShares = 0
   450  	}
   451  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   452  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   453  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   454  		resources.CPUPeriod = 0
   455  	}
   456  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   457  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   458  	}
   459  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   460  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   461  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   462  		resources.CPUQuota = 0
   463  	}
   464  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   465  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   466  	}
   467  	if resources.CPUPercent > 0 {
   468  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   469  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   470  		resources.CPUPercent = 0
   471  	}
   472  
   473  	// cpuset subsystem checks and adjustments
   474  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   475  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   476  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   477  		resources.CpusetCpus = ""
   478  		resources.CpusetMems = ""
   479  	}
   480  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   481  	if err != nil {
   482  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   483  	}
   484  	if !cpusAvailable {
   485  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   486  	}
   487  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   488  	if err != nil {
   489  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   490  	}
   491  	if !memsAvailable {
   492  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   493  	}
   494  
   495  	// blkio subsystem checks and adjustments
   496  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   497  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   498  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   499  		resources.BlkioWeight = 0
   500  	}
   501  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   502  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   503  	}
   504  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   505  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   506  	}
   507  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   508  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   509  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   510  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   511  	}
   512  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   513  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   514  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   515  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   516  	}
   517  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   518  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   519  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   520  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   521  
   522  	}
   523  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   524  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   525  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   526  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   527  	}
   528  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   529  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   530  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   531  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   532  	}
   533  
   534  	return warnings, nil
   535  }
   536  
   537  func (daemon *Daemon) getCgroupDriver() string {
   538  	cgroupDriver := cgroupFsDriver
   539  
   540  	if UsingSystemd(daemon.configStore) {
   541  		cgroupDriver = cgroupSystemdDriver
   542  	}
   543  	return cgroupDriver
   544  }
   545  
   546  // getCD gets the raw value of the native.cgroupdriver option, if set.
   547  func getCD(config *config.Config) string {
   548  	for _, option := range config.ExecOptions {
   549  		key, val, err := parsers.ParseKeyValueOpt(option)
   550  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   551  			continue
   552  		}
   553  		return val
   554  	}
   555  	return ""
   556  }
   557  
   558  // VerifyCgroupDriver validates native.cgroupdriver
   559  func VerifyCgroupDriver(config *config.Config) error {
   560  	cd := getCD(config)
   561  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   562  		return nil
   563  	}
   564  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   565  }
   566  
   567  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   568  func UsingSystemd(config *config.Config) bool {
   569  	return getCD(config) == cgroupSystemdDriver
   570  }
   571  
   572  // verifyPlatformContainerSettings performs platform-specific validation of the
   573  // hostconfig and config structures.
   574  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   575  	var warnings []string
   576  	sysInfo := sysinfo.New(true)
   577  
   578  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   579  	if err != nil {
   580  		return warnings, err
   581  	}
   582  
   583  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   584  
   585  	// no matter err is nil or not, w could have data in itself.
   586  	warnings = append(warnings, w...)
   587  
   588  	if err != nil {
   589  		return warnings, err
   590  	}
   591  
   592  	if hostConfig.ShmSize < 0 {
   593  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   594  	}
   595  
   596  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   597  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   598  	}
   599  
   600  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   601  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   602  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   603  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   604  	}
   605  	// check for various conflicting options with user namespaces
   606  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   607  		if hostConfig.Privileged {
   608  			return warnings, fmt.Errorf("privileged mode is incompatible with user namespaces.  You must run the container in the host namespace when running privileged mode")
   609  		}
   610  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   611  			return warnings, fmt.Errorf("cannot share the host's network namespace when user namespaces are enabled")
   612  		}
   613  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   614  			return warnings, fmt.Errorf("cannot share the host PID namespace when user namespaces are enabled")
   615  		}
   616  	}
   617  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   618  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   619  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   620  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   621  		}
   622  	}
   623  	if hostConfig.Runtime == "" {
   624  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   625  	}
   626  
   627  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   628  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   629  	}
   630  
   631  	parser := volume.NewParser(runtime.GOOS)
   632  	for dest := range hostConfig.Tmpfs {
   633  		if err := parser.ValidateTmpfsMountDestination(dest); err != nil {
   634  			return warnings, err
   635  		}
   636  	}
   637  
   638  	return warnings, nil
   639  }
   640  
   641  func (daemon *Daemon) loadRuntimes() error {
   642  	return daemon.initRuntimes(daemon.configStore.Runtimes)
   643  }
   644  
   645  func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) {
   646  	runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes")
   647  	// Remove old temp directory if any
   648  	os.RemoveAll(runtimeDir + "-old")
   649  	tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes")
   650  	if err != nil {
   651  		return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts")
   652  	}
   653  	defer func() {
   654  		if err != nil {
   655  			if err1 := os.RemoveAll(tmpDir); err1 != nil {
   656  				logrus.WithError(err1).WithField("dir", tmpDir).
   657  					Warnf("failed to remove tmp dir")
   658  			}
   659  			return
   660  		}
   661  
   662  		if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil {
   663  			return
   664  		}
   665  		if err = os.Rename(tmpDir, runtimeDir); err != nil {
   666  			err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start")
   667  			return
   668  		}
   669  		if err = os.RemoveAll(runtimeDir + "-old"); err != nil {
   670  			logrus.WithError(err).WithField("dir", tmpDir).
   671  				Warnf("failed to remove old runtimes dir")
   672  		}
   673  	}()
   674  
   675  	for name, rt := range runtimes {
   676  		if len(rt.Args) == 0 {
   677  			continue
   678  		}
   679  
   680  		script := filepath.Join(tmpDir, name)
   681  		content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " "))
   682  		if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil {
   683  			return err
   684  		}
   685  	}
   686  	return nil
   687  }
   688  
   689  // reloadPlatform updates configuration with platform specific options
   690  // and updates the passed attributes
   691  func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
   692  	if err := conf.ValidatePlatformConfig(); err != nil {
   693  		return err
   694  	}
   695  
   696  	if conf.IsValueSet("runtimes") {
   697  		// Always set the default one
   698  		conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   699  		if err := daemon.initRuntimes(conf.Runtimes); err != nil {
   700  			return err
   701  		}
   702  		daemon.configStore.Runtimes = conf.Runtimes
   703  	}
   704  
   705  	if conf.DefaultRuntime != "" {
   706  		daemon.configStore.DefaultRuntime = conf.DefaultRuntime
   707  	}
   708  
   709  	if conf.IsValueSet("default-shm-size") {
   710  		daemon.configStore.ShmSize = conf.ShmSize
   711  	}
   712  
   713  	if conf.IpcMode != "" {
   714  		daemon.configStore.IpcMode = conf.IpcMode
   715  	}
   716  
   717  	// Update attributes
   718  	var runtimeList bytes.Buffer
   719  	for name, rt := range daemon.configStore.Runtimes {
   720  		if runtimeList.Len() > 0 {
   721  			runtimeList.WriteRune(' ')
   722  		}
   723  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   724  	}
   725  
   726  	attributes["runtimes"] = runtimeList.String()
   727  	attributes["default-runtime"] = daemon.configStore.DefaultRuntime
   728  	attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize)
   729  	attributes["default-ipc-mode"] = daemon.configStore.IpcMode
   730  
   731  	return nil
   732  }
   733  
   734  // verifyDaemonSettings performs validation of daemon config struct
   735  func verifyDaemonSettings(conf *config.Config) error {
   736  	// Check for mutually incompatible config options
   737  	if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" {
   738  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   739  	}
   740  	if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication {
   741  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   742  	}
   743  	if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq {
   744  		conf.BridgeConfig.EnableIPMasq = false
   745  	}
   746  	if err := VerifyCgroupDriver(conf); err != nil {
   747  		return err
   748  	}
   749  	if conf.CgroupParent != "" && UsingSystemd(conf) {
   750  		if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") {
   751  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   752  		}
   753  	}
   754  
   755  	if conf.DefaultRuntime == "" {
   756  		conf.DefaultRuntime = config.StockRuntimeName
   757  	}
   758  	if conf.Runtimes == nil {
   759  		conf.Runtimes = make(map[string]types.Runtime)
   760  	}
   761  	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName}
   762  
   763  	return nil
   764  }
   765  
   766  // checkSystem validates platform-specific requirements
   767  func checkSystem() error {
   768  	if os.Geteuid() != 0 {
   769  		return fmt.Errorf("The Docker daemon needs to be run as root")
   770  	}
   771  	return checkKernel()
   772  }
   773  
   774  // configureMaxThreads sets the Go runtime max threads threshold
   775  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   776  func configureMaxThreads(config *config.Config) error {
   777  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   778  	if err != nil {
   779  		return err
   780  	}
   781  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   782  	if err != nil {
   783  		return err
   784  	}
   785  	maxThreads := (mtint / 100) * 90
   786  	debug.SetMaxThreads(maxThreads)
   787  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   788  	return nil
   789  }
   790  
   791  func overlaySupportsSelinux() (bool, error) {
   792  	f, err := os.Open("/proc/kallsyms")
   793  	if err != nil {
   794  		if os.IsNotExist(err) {
   795  			return false, nil
   796  		}
   797  		return false, err
   798  	}
   799  	defer f.Close()
   800  
   801  	var symAddr, symType, symName, text string
   802  
   803  	s := bufio.NewScanner(f)
   804  	for s.Scan() {
   805  		if err := s.Err(); err != nil {
   806  			return false, err
   807  		}
   808  
   809  		text = s.Text()
   810  		if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil {
   811  			return false, fmt.Errorf("Scanning '%s' failed: %s", text, err)
   812  		}
   813  
   814  		// Check for presence of symbol security_inode_copy_up.
   815  		if symName == "security_inode_copy_up" {
   816  			return true, nil
   817  		}
   818  	}
   819  	return false, nil
   820  }
   821  
   822  // configureKernelSecuritySupport configures and validates security support for the kernel
   823  func configureKernelSecuritySupport(config *config.Config, driverNames []string) error {
   824  	if config.EnableSelinuxSupport {
   825  		if !selinuxEnabled() {
   826  			logrus.Warn("Docker could not enable SELinux on the host system")
   827  			return nil
   828  		}
   829  
   830  		overlayFound := false
   831  		for _, d := range driverNames {
   832  			if d == "overlay" || d == "overlay2" {
   833  				overlayFound = true
   834  				break
   835  			}
   836  		}
   837  
   838  		if overlayFound {
   839  			// If driver is overlay or overlay2, make sure kernel
   840  			// supports selinux with overlay.
   841  			supported, err := overlaySupportsSelinux()
   842  			if err != nil {
   843  				return err
   844  			}
   845  
   846  			if !supported {
   847  				logrus.Warnf("SELinux is not supported with the %v graph driver on this kernel", driverNames)
   848  			}
   849  		}
   850  	} else {
   851  		selinuxSetDisabled()
   852  	}
   853  	return nil
   854  }
   855  
   856  func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   857  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   858  	if err != nil {
   859  		return nil, err
   860  	}
   861  
   862  	controller, err := libnetwork.New(netOptions...)
   863  	if err != nil {
   864  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   865  	}
   866  
   867  	if len(activeSandboxes) > 0 {
   868  		logrus.Info("There are old running containers, the network config will not take affect")
   869  		return controller, nil
   870  	}
   871  
   872  	// Initialize default network on "null"
   873  	if n, _ := controller.NetworkByName("none"); n == nil {
   874  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   875  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   876  		}
   877  	}
   878  
   879  	// Initialize default network on "host"
   880  	if n, _ := controller.NetworkByName("host"); n == nil {
   881  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   882  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   883  		}
   884  	}
   885  
   886  	// Clear stale bridge network
   887  	if n, err := controller.NetworkByName("bridge"); err == nil {
   888  		if err = n.Delete(); err != nil {
   889  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   890  		}
   891  	}
   892  
   893  	if !config.DisableBridge {
   894  		// Initialize default driver "bridge"
   895  		if err := initBridgeDriver(controller, config); err != nil {
   896  			return nil, err
   897  		}
   898  	} else {
   899  		removeDefaultBridgeInterface()
   900  	}
   901  
   902  	return controller, nil
   903  }
   904  
   905  func driverOptions(config *config.Config) []nwconfig.Option {
   906  	bridgeConfig := options.Generic{
   907  		"EnableIPForwarding":  config.BridgeConfig.EnableIPForward,
   908  		"EnableIPTables":      config.BridgeConfig.EnableIPTables,
   909  		"EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy,
   910  		"UserlandProxyPath":   config.BridgeConfig.UserlandProxyPath}
   911  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   912  
   913  	dOptions := []nwconfig.Option{}
   914  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   915  	return dOptions
   916  }
   917  
   918  func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error {
   919  	bridgeName := bridge.DefaultBridgeName
   920  	if config.BridgeConfig.Iface != "" {
   921  		bridgeName = config.BridgeConfig.Iface
   922  	}
   923  	netOption := map[string]string{
   924  		bridge.BridgeName:         bridgeName,
   925  		bridge.DefaultBridge:      strconv.FormatBool(true),
   926  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   927  		bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq),
   928  		bridge.EnableICC:          strconv.FormatBool(config.BridgeConfig.InterContainerCommunication),
   929  	}
   930  
   931  	// --ip processing
   932  	if config.BridgeConfig.DefaultIP != nil {
   933  		netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String()
   934  	}
   935  
   936  	var (
   937  		ipamV4Conf *libnetwork.IpamConf
   938  		ipamV6Conf *libnetwork.IpamConf
   939  	)
   940  
   941  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   942  
   943  	nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   944  	if err != nil {
   945  		return errors.Wrap(err, "list bridge addresses failed")
   946  	}
   947  
   948  	nw := nwList[0]
   949  	if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" {
   950  		_, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
   951  		if err != nil {
   952  			return errors.Wrap(err, "parse CIDR failed")
   953  		}
   954  		// Iterate through in case there are multiple addresses for the bridge
   955  		for _, entry := range nwList {
   956  			if fCIDR.Contains(entry.IP) {
   957  				nw = entry
   958  				break
   959  			}
   960  		}
   961  	}
   962  
   963  	ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   964  	hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   965  	if hip.IsGlobalUnicast() {
   966  		ipamV4Conf.Gateway = nw.IP.String()
   967  	}
   968  
   969  	if config.BridgeConfig.IP != "" {
   970  		ipamV4Conf.PreferredPool = config.BridgeConfig.IP
   971  		ip, _, err := net.ParseCIDR(config.BridgeConfig.IP)
   972  		if err != nil {
   973  			return err
   974  		}
   975  		ipamV4Conf.Gateway = ip.String()
   976  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   977  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   978  	}
   979  
   980  	if config.BridgeConfig.FixedCIDR != "" {
   981  		_, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
   982  		if err != nil {
   983  			return err
   984  		}
   985  
   986  		ipamV4Conf.SubPool = fCIDR.String()
   987  	}
   988  
   989  	if config.BridgeConfig.DefaultGatewayIPv4 != nil {
   990  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String()
   991  	}
   992  
   993  	var deferIPv6Alloc bool
   994  	if config.BridgeConfig.FixedCIDRv6 != "" {
   995  		_, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6)
   996  		if err != nil {
   997  			return err
   998  		}
   999  
  1000  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
  1001  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
  1002  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
  1003  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
  1004  		// on this network until after the driver has created the endpoint and returned the
  1005  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
  1006  		ones, _ := fCIDRv6.Mask.Size()
  1007  		deferIPv6Alloc = ones <= 80
  1008  
  1009  		if ipamV6Conf == nil {
  1010  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  1011  		}
  1012  		ipamV6Conf.PreferredPool = fCIDRv6.String()
  1013  
  1014  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
  1015  		// address belongs to the same network, we need to inform libnetwork about it, so
  1016  		// that it can be reserved with IPAM and it will not be given away to somebody else
  1017  		for _, nw6 := range nw6List {
  1018  			if fCIDRv6.Contains(nw6.IP) {
  1019  				ipamV6Conf.Gateway = nw6.IP.String()
  1020  				break
  1021  			}
  1022  		}
  1023  	}
  1024  
  1025  	if config.BridgeConfig.DefaultGatewayIPv6 != nil {
  1026  		if ipamV6Conf == nil {
  1027  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
  1028  		}
  1029  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String()
  1030  	}
  1031  
  1032  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
  1033  	v6Conf := []*libnetwork.IpamConf{}
  1034  	if ipamV6Conf != nil {
  1035  		v6Conf = append(v6Conf, ipamV6Conf)
  1036  	}
  1037  	// Initialize default network on "bridge" with the same name
  1038  	_, err = controller.NewNetwork("bridge", "bridge", "",
  1039  		libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6),
  1040  		libnetwork.NetworkOptionDriverOpts(netOption),
  1041  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
  1042  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
  1043  	if err != nil {
  1044  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
  1045  	}
  1046  	return nil
  1047  }
  1048  
  1049  // Remove default bridge interface if present (--bridge=none use case)
  1050  func removeDefaultBridgeInterface() {
  1051  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
  1052  		if err := netlink.LinkDel(lnk); err != nil {
  1053  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
  1054  		}
  1055  	}
  1056  }
  1057  
  1058  func (daemon *Daemon) getLayerInit() func(containerfs.ContainerFS) error {
  1059  	return daemon.setupInitLayer
  1060  }
  1061  
  1062  // Parse the remapped root (user namespace) option, which can be one of:
  1063  //   username            - valid username from /etc/passwd
  1064  //   username:groupname  - valid username; valid groupname from /etc/group
  1065  //   uid                 - 32-bit unsigned int valid Linux UID value
  1066  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
  1067  //
  1068  //  If no groupname is specified, and a username is specified, an attempt
  1069  //  will be made to lookup a gid for that username as a groupname
  1070  //
  1071  //  If names are used, they are verified to exist in passwd/group
  1072  func parseRemappedRoot(usergrp string) (string, string, error) {
  1073  
  1074  	var (
  1075  		userID, groupID     int
  1076  		username, groupname string
  1077  	)
  1078  
  1079  	idparts := strings.Split(usergrp, ":")
  1080  	if len(idparts) > 2 {
  1081  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
  1082  	}
  1083  
  1084  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
  1085  		// must be a uid; take it as valid
  1086  		userID = int(uid)
  1087  		luser, err := idtools.LookupUID(userID)
  1088  		if err != nil {
  1089  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
  1090  		}
  1091  		username = luser.Name
  1092  		if len(idparts) == 1 {
  1093  			// if the uid was numeric and no gid was specified, take the uid as the gid
  1094  			groupID = userID
  1095  			lgrp, err := idtools.LookupGID(groupID)
  1096  			if err != nil {
  1097  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
  1098  			}
  1099  			groupname = lgrp.Name
  1100  		}
  1101  	} else {
  1102  		lookupName := idparts[0]
  1103  		// special case: if the user specified "default", they want Docker to create or
  1104  		// use (after creation) the "dockremap" user/group for root remapping
  1105  		if lookupName == defaultIDSpecifier {
  1106  			lookupName = defaultRemappedID
  1107  		}
  1108  		luser, err := idtools.LookupUser(lookupName)
  1109  		if err != nil && idparts[0] != defaultIDSpecifier {
  1110  			// error if the name requested isn't the special "dockremap" ID
  1111  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
  1112  		} else if err != nil {
  1113  			// special case-- if the username == "default", then we have been asked
  1114  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
  1115  			// ranges will be used for the user and group mappings in user namespaced containers
  1116  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
  1117  			if err == nil {
  1118  				return defaultRemappedID, defaultRemappedID, nil
  1119  			}
  1120  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
  1121  		}
  1122  		username = luser.Name
  1123  		if len(idparts) == 1 {
  1124  			// we only have a string username, and no group specified; look up gid from username as group
  1125  			group, err := idtools.LookupGroup(lookupName)
  1126  			if err != nil {
  1127  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
  1128  			}
  1129  			groupname = group.Name
  1130  		}
  1131  	}
  1132  
  1133  	if len(idparts) == 2 {
  1134  		// groupname or gid is separately specified and must be resolved
  1135  		// to an unsigned 32-bit gid
  1136  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
  1137  			// must be a gid, take it as valid
  1138  			groupID = int(gid)
  1139  			lgrp, err := idtools.LookupGID(groupID)
  1140  			if err != nil {
  1141  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
  1142  			}
  1143  			groupname = lgrp.Name
  1144  		} else {
  1145  			// not a number; attempt a lookup
  1146  			if _, err := idtools.LookupGroup(idparts[1]); err != nil {
  1147  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
  1148  			}
  1149  			groupname = idparts[1]
  1150  		}
  1151  	}
  1152  	return username, groupname, nil
  1153  }
  1154  
  1155  func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) {
  1156  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
  1157  		return nil, fmt.Errorf("User namespaces are only supported on Linux")
  1158  	}
  1159  
  1160  	// if the daemon was started with remapped root option, parse
  1161  	// the config option to the int uid,gid values
  1162  	if config.RemappedRoot != "" {
  1163  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
  1164  		if err != nil {
  1165  			return nil, err
  1166  		}
  1167  		if username == "root" {
  1168  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  1169  			// effectively
  1170  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  1171  			return &idtools.IDMappings{}, nil
  1172  		}
  1173  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  1174  		// update remapped root setting now that we have resolved them to actual names
  1175  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  1176  
  1177  		mappings, err := idtools.NewIDMappings(username, groupname)
  1178  		if err != nil {
  1179  			return nil, errors.Wrapf(err, "Can't create ID mappings: %v")
  1180  		}
  1181  		return mappings, nil
  1182  	}
  1183  	return &idtools.IDMappings{}, nil
  1184  }
  1185  
  1186  func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error {
  1187  	config.Root = rootDir
  1188  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1189  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
  1190  	// (e.g. mounted layers of a container) can traverse this path.
  1191  	// The user namespace support will create subdirectories for the remapped root host uid:gid
  1192  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1193  	// layer content subtrees.
  1194  	if _, err := os.Stat(rootDir); err == nil {
  1195  		// root current exists; verify the access bits are correct by setting them
  1196  		if err = os.Chmod(rootDir, 0711); err != nil {
  1197  			return err
  1198  		}
  1199  	} else if os.IsNotExist(err) {
  1200  		// no root exists yet, create it 0711 with root:root ownership
  1201  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1202  			return err
  1203  		}
  1204  	}
  1205  
  1206  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1207  	// with any/all specified remapped root uid/gid options on the daemon creating
  1208  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1209  	// `chdir()` to work for containers namespaced to that uid/gid)
  1210  	if config.RemappedRoot != "" {
  1211  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID))
  1212  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1213  		// Create the root directory if it doesn't exist
  1214  		if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil {
  1215  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1216  		}
  1217  		// we also need to verify that any pre-existing directories in the path to
  1218  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1219  		// has strict permissions that don't allow "x", container start will fail, so
  1220  		// better to warn and fail now
  1221  		dirPath := config.Root
  1222  		for {
  1223  			dirPath = filepath.Dir(dirPath)
  1224  			if dirPath == "/" {
  1225  				break
  1226  			}
  1227  			if !idtools.CanAccess(dirPath, rootIDs) {
  1228  				return fmt.Errorf("a subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories", config.Root)
  1229  			}
  1230  		}
  1231  	}
  1232  
  1233  	if err := ensureSharedOrSlave(config.Root); err != nil {
  1234  		if err := mount.MakeShared(config.Root); err != nil {
  1235  			logrus.WithError(err).WithField("dir", config.Root).Warn("Could not set daemon root propagation to shared, this is not generally critical but may cause some functionality to not work or fallback to less desirable behavior")
  1236  		}
  1237  	}
  1238  	return nil
  1239  }
  1240  
  1241  // registerLinks writes the links to a file.
  1242  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1243  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1244  		return nil
  1245  	}
  1246  
  1247  	for _, l := range hostConfig.Links {
  1248  		name, alias, err := opts.ParseLink(l)
  1249  		if err != nil {
  1250  			return err
  1251  		}
  1252  		child, err := daemon.GetContainer(name)
  1253  		if err != nil {
  1254  			return errors.Wrapf(err, "could not get container for %s", name)
  1255  		}
  1256  		for child.HostConfig.NetworkMode.IsContainer() {
  1257  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1258  			child, err = daemon.GetContainer(parts[1])
  1259  			if err != nil {
  1260  				return errors.Wrapf(err, "Could not get container for %s", parts[1])
  1261  			}
  1262  		}
  1263  		if child.HostConfig.NetworkMode.IsHost() {
  1264  			return runconfig.ErrConflictHostNetworkAndLinks
  1265  		}
  1266  		if err := daemon.registerLink(container, child, alias); err != nil {
  1267  			return err
  1268  		}
  1269  	}
  1270  
  1271  	// After we load all the links into the daemon
  1272  	// set them to nil on the hostconfig
  1273  	_, err := container.WriteHostConfig()
  1274  	return err
  1275  }
  1276  
  1277  // conditionalMountOnStart is a platform specific helper function during the
  1278  // container start to call mount.
  1279  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1280  	return daemon.Mount(container)
  1281  }
  1282  
  1283  // conditionalUnmountOnCleanup is a platform specific helper function called
  1284  // during the cleanup of a container to unmount.
  1285  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1286  	return daemon.Unmount(container)
  1287  }
  1288  
  1289  func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry {
  1290  	out := make([]types.BlkioStatEntry, len(entries))
  1291  	for i, re := range entries {
  1292  		out[i] = types.BlkioStatEntry{
  1293  			Major: re.Major,
  1294  			Minor: re.Minor,
  1295  			Op:    re.Op,
  1296  			Value: re.Value,
  1297  		}
  1298  	}
  1299  	return out
  1300  }
  1301  
  1302  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1303  	if !c.IsRunning() {
  1304  		return nil, errNotRunning(c.ID)
  1305  	}
  1306  	cs, err := daemon.containerd.Stats(context.Background(), c.ID)
  1307  	if err != nil {
  1308  		if strings.Contains(err.Error(), "container not found") {
  1309  			return nil, containerNotFound(c.ID)
  1310  		}
  1311  		return nil, err
  1312  	}
  1313  	s := &types.StatsJSON{}
  1314  	s.Read = cs.Read
  1315  	stats := cs.Metrics
  1316  	if stats.Blkio != nil {
  1317  		s.BlkioStats = types.BlkioStats{
  1318  			IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive),
  1319  			IoServicedRecursive:     copyBlkioEntry(stats.Blkio.IoServicedRecursive),
  1320  			IoQueuedRecursive:       copyBlkioEntry(stats.Blkio.IoQueuedRecursive),
  1321  			IoServiceTimeRecursive:  copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive),
  1322  			IoWaitTimeRecursive:     copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive),
  1323  			IoMergedRecursive:       copyBlkioEntry(stats.Blkio.IoMergedRecursive),
  1324  			IoTimeRecursive:         copyBlkioEntry(stats.Blkio.IoTimeRecursive),
  1325  			SectorsRecursive:        copyBlkioEntry(stats.Blkio.SectorsRecursive),
  1326  		}
  1327  	}
  1328  	if stats.CPU != nil {
  1329  		s.CPUStats = types.CPUStats{
  1330  			CPUUsage: types.CPUUsage{
  1331  				TotalUsage:        stats.CPU.Usage.Total,
  1332  				PercpuUsage:       stats.CPU.Usage.PerCPU,
  1333  				UsageInKernelmode: stats.CPU.Usage.Kernel,
  1334  				UsageInUsermode:   stats.CPU.Usage.User,
  1335  			},
  1336  			ThrottlingData: types.ThrottlingData{
  1337  				Periods:          stats.CPU.Throttling.Periods,
  1338  				ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods,
  1339  				ThrottledTime:    stats.CPU.Throttling.ThrottledTime,
  1340  			},
  1341  		}
  1342  	}
  1343  
  1344  	if stats.Memory != nil {
  1345  		raw := make(map[string]uint64)
  1346  		raw["cache"] = stats.Memory.Cache
  1347  		raw["rss"] = stats.Memory.RSS
  1348  		raw["rss_huge"] = stats.Memory.RSSHuge
  1349  		raw["mapped_file"] = stats.Memory.MappedFile
  1350  		raw["dirty"] = stats.Memory.Dirty
  1351  		raw["writeback"] = stats.Memory.Writeback
  1352  		raw["pgpgin"] = stats.Memory.PgPgIn
  1353  		raw["pgpgout"] = stats.Memory.PgPgOut
  1354  		raw["pgfault"] = stats.Memory.PgFault
  1355  		raw["pgmajfault"] = stats.Memory.PgMajFault
  1356  		raw["inactive_anon"] = stats.Memory.InactiveAnon
  1357  		raw["active_anon"] = stats.Memory.ActiveAnon
  1358  		raw["inactive_file"] = stats.Memory.InactiveFile
  1359  		raw["active_file"] = stats.Memory.ActiveFile
  1360  		raw["unevictable"] = stats.Memory.Unevictable
  1361  		raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit
  1362  		raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit
  1363  		raw["total_cache"] = stats.Memory.TotalCache
  1364  		raw["total_rss"] = stats.Memory.TotalRSS
  1365  		raw["total_rss_huge"] = stats.Memory.TotalRSSHuge
  1366  		raw["total_mapped_file"] = stats.Memory.TotalMappedFile
  1367  		raw["total_dirty"] = stats.Memory.TotalDirty
  1368  		raw["total_writeback"] = stats.Memory.TotalWriteback
  1369  		raw["total_pgpgin"] = stats.Memory.TotalPgPgIn
  1370  		raw["total_pgpgout"] = stats.Memory.TotalPgPgOut
  1371  		raw["total_pgfault"] = stats.Memory.TotalPgFault
  1372  		raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault
  1373  		raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon
  1374  		raw["total_active_anon"] = stats.Memory.TotalActiveAnon
  1375  		raw["total_inactive_file"] = stats.Memory.TotalInactiveFile
  1376  		raw["total_active_file"] = stats.Memory.TotalActiveFile
  1377  		raw["total_unevictable"] = stats.Memory.TotalUnevictable
  1378  
  1379  		if stats.Memory.Usage != nil {
  1380  			s.MemoryStats = types.MemoryStats{
  1381  				Stats:    raw,
  1382  				Usage:    stats.Memory.Usage.Usage,
  1383  				MaxUsage: stats.Memory.Usage.Max,
  1384  				Limit:    stats.Memory.Usage.Limit,
  1385  				Failcnt:  stats.Memory.Usage.Failcnt,
  1386  			}
  1387  		} else {
  1388  			s.MemoryStats = types.MemoryStats{
  1389  				Stats: raw,
  1390  			}
  1391  		}
  1392  
  1393  		// if the container does not set memory limit, use the machineMemory
  1394  		if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
  1395  			s.MemoryStats.Limit = daemon.machineMemory
  1396  		}
  1397  	}
  1398  
  1399  	if stats.Pids != nil {
  1400  		s.PidsStats = types.PidsStats{
  1401  			Current: stats.Pids.Current,
  1402  			Limit:   stats.Pids.Limit,
  1403  		}
  1404  	}
  1405  
  1406  	return s, nil
  1407  }
  1408  
  1409  // setDefaultIsolation determines the default isolation mode for the
  1410  // daemon to run in. This is only applicable on Windows
  1411  func (daemon *Daemon) setDefaultIsolation() error {
  1412  	return nil
  1413  }
  1414  
  1415  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1416  	var layers []string
  1417  	for _, l := range rootfs.DiffIDs {
  1418  		layers = append(layers, l.String())
  1419  	}
  1420  	return types.RootFS{
  1421  		Type:   rootfs.Type,
  1422  		Layers: layers,
  1423  	}
  1424  }
  1425  
  1426  // setupDaemonProcess sets various settings for the daemon's process
  1427  func setupDaemonProcess(config *config.Config) error {
  1428  	// setup the daemons oom_score_adj
  1429  	if err := setupOOMScoreAdj(config.OOMScoreAdjust); err != nil {
  1430  		return err
  1431  	}
  1432  	if err := setMayDetachMounts(); err != nil {
  1433  		logrus.WithError(err).Warn("Could not set may_detach_mounts kernel parameter")
  1434  	}
  1435  	return nil
  1436  }
  1437  
  1438  // This is used to allow removal of mountpoints that may be mounted in other
  1439  // namespaces on RHEL based kernels starting from RHEL 7.4.
  1440  // Without this setting, removals on these RHEL based kernels may fail with
  1441  // "device or resource busy".
  1442  // This setting is not available in upstream kernels as it is not configurable,
  1443  // but has been in the upstream kernels since 3.15.
  1444  func setMayDetachMounts() error {
  1445  	f, err := os.OpenFile("/proc/sys/fs/may_detach_mounts", os.O_WRONLY, 0)
  1446  	if err != nil {
  1447  		if os.IsNotExist(err) {
  1448  			return nil
  1449  		}
  1450  		return errors.Wrap(err, "error opening may_detach_mounts kernel config file")
  1451  	}
  1452  	defer f.Close()
  1453  
  1454  	_, err = f.WriteString("1")
  1455  	if os.IsPermission(err) {
  1456  		// Setting may_detach_mounts does not work in an
  1457  		// unprivileged container. Ignore the error, but log
  1458  		// it if we appear not to be in that situation.
  1459  		if !rsystem.RunningInUserNS() {
  1460  			logrus.Debugf("Permission denied writing %q to /proc/sys/fs/may_detach_mounts", "1")
  1461  		}
  1462  		return nil
  1463  	}
  1464  	return err
  1465  }
  1466  
  1467  func setupOOMScoreAdj(score int) error {
  1468  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1469  	if err != nil {
  1470  		return err
  1471  	}
  1472  	defer f.Close()
  1473  	stringScore := strconv.Itoa(score)
  1474  	_, err = f.WriteString(stringScore)
  1475  	if os.IsPermission(err) {
  1476  		// Setting oom_score_adj does not work in an
  1477  		// unprivileged container. Ignore the error, but log
  1478  		// it if we appear not to be in that situation.
  1479  		if !rsystem.RunningInUserNS() {
  1480  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1481  		}
  1482  		return nil
  1483  	}
  1484  
  1485  	return err
  1486  }
  1487  
  1488  func (daemon *Daemon) initCgroupsPath(path string) error {
  1489  	if path == "/" || path == "." {
  1490  		return nil
  1491  	}
  1492  
  1493  	if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 {
  1494  		return nil
  1495  	}
  1496  
  1497  	// Recursively create cgroup to ensure that the system and all parent cgroups have values set
  1498  	// for the period and runtime as this limits what the children can be set to.
  1499  	daemon.initCgroupsPath(filepath.Dir(path))
  1500  
  1501  	mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1502  	if err != nil {
  1503  		return err
  1504  	}
  1505  	// When docker is run inside docker, the root is based of the host cgroup.
  1506  	// Should this be handled in runc/libcontainer/cgroups ?
  1507  	if strings.HasPrefix(root, "/docker/") {
  1508  		root = "/"
  1509  	}
  1510  
  1511  	path = filepath.Join(mnt, root, path)
  1512  	sysinfo := sysinfo.New(true)
  1513  	if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
  1514  		return err
  1515  	}
  1516  	if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path); err != nil {
  1517  		return err
  1518  	}
  1519  	return nil
  1520  }
  1521  
  1522  func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error {
  1523  	if sysinfoPresent && configValue != 0 {
  1524  		if err := os.MkdirAll(path, 0755); err != nil {
  1525  			return err
  1526  		}
  1527  		if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil {
  1528  			return err
  1529  		}
  1530  	}
  1531  	return nil
  1532  }
  1533  
  1534  func (daemon *Daemon) setupSeccompProfile() error {
  1535  	if daemon.configStore.SeccompProfile != "" {
  1536  		daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1537  		b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1538  		if err != nil {
  1539  			return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1540  		}
  1541  		daemon.seccompProfile = b
  1542  	}
  1543  	return nil
  1544  }