github.com/openshift/moby-moby@v1.13.2-0.20170601211448-f5ec1e2936dc/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"runtime/debug"
    14  	"strconv"
    15  	"strings"
    16  	"syscall"
    17  	"time"
    18  
    19  	"github.com/Sirupsen/logrus"
    20  	"github.com/docker/docker/api/types"
    21  	"github.com/docker/docker/api/types/blkiodev"
    22  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    23  	containertypes "github.com/docker/docker/api/types/container"
    24  	"github.com/docker/docker/container"
    25  	"github.com/docker/docker/image"
    26  	"github.com/docker/docker/pkg/idtools"
    27  	"github.com/docker/docker/pkg/parsers"
    28  	"github.com/docker/docker/pkg/parsers/kernel"
    29  	"github.com/docker/docker/pkg/sysinfo"
    30  	"github.com/docker/docker/runconfig"
    31  	runconfigopts "github.com/docker/docker/runconfig/opts"
    32  	"github.com/docker/libnetwork"
    33  	nwconfig "github.com/docker/libnetwork/config"
    34  	"github.com/docker/libnetwork/drivers/bridge"
    35  	"github.com/docker/libnetwork/netlabel"
    36  	"github.com/docker/libnetwork/netutils"
    37  	"github.com/docker/libnetwork/options"
    38  	lntypes "github.com/docker/libnetwork/types"
    39  	"github.com/golang/protobuf/ptypes"
    40  	"github.com/opencontainers/runc/libcontainer/cgroups"
    41  	"github.com/opencontainers/runc/libcontainer/label"
    42  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    43  	specs "github.com/opencontainers/runtime-spec/specs-go"
    44  	"github.com/pkg/errors"
    45  	"github.com/vishvananda/netlink"
    46  )
    47  
    48  const (
    49  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    50  	linuxMinCPUShares = 2
    51  	linuxMaxCPUShares = 262144
    52  	platformSupported = true
    53  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    54  	linuxMinMemory = 4194304
    55  	// constants for remapped root settings
    56  	defaultIDSpecifier string = "default"
    57  	defaultRemappedID  string = "dockremap"
    58  
    59  	// constant for cgroup drivers
    60  	cgroupFsDriver      = "cgroupfs"
    61  	cgroupSystemdDriver = "systemd"
    62  )
    63  
    64  func getMemoryResources(config containertypes.Resources) *specs.Memory {
    65  	memory := specs.Memory{}
    66  
    67  	if config.Memory > 0 {
    68  		limit := uint64(config.Memory)
    69  		memory.Limit = &limit
    70  	}
    71  
    72  	if config.MemoryReservation > 0 {
    73  		reservation := uint64(config.MemoryReservation)
    74  		memory.Reservation = &reservation
    75  	}
    76  
    77  	if config.MemorySwap != 0 {
    78  		swap := uint64(config.MemorySwap)
    79  		memory.Swap = &swap
    80  	}
    81  
    82  	if config.MemorySwappiness != nil {
    83  		swappiness := uint64(*config.MemorySwappiness)
    84  		memory.Swappiness = &swappiness
    85  	}
    86  
    87  	if config.KernelMemory != 0 {
    88  		kernelMemory := uint64(config.KernelMemory)
    89  		memory.Kernel = &kernelMemory
    90  	}
    91  
    92  	return &memory
    93  }
    94  
    95  func getCPUResources(config containertypes.Resources) *specs.CPU {
    96  	cpu := specs.CPU{}
    97  
    98  	if config.CPUShares != 0 {
    99  		shares := uint64(config.CPUShares)
   100  		cpu.Shares = &shares
   101  	}
   102  
   103  	if config.CpusetCpus != "" {
   104  		cpuset := config.CpusetCpus
   105  		cpu.Cpus = &cpuset
   106  	}
   107  
   108  	if config.CpusetMems != "" {
   109  		cpuset := config.CpusetMems
   110  		cpu.Mems = &cpuset
   111  	}
   112  
   113  	if config.NanoCPUs > 0 {
   114  		// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   115  		period := uint64(100 * time.Millisecond / time.Microsecond)
   116  		quota := uint64(config.NanoCPUs) * period / 1e9
   117  		cpu.Period = &period
   118  		cpu.Quota = &quota
   119  	}
   120  
   121  	if config.CPUPeriod != 0 {
   122  		period := uint64(config.CPUPeriod)
   123  		cpu.Period = &period
   124  	}
   125  
   126  	if config.CPUQuota != 0 {
   127  		quota := uint64(config.CPUQuota)
   128  		cpu.Quota = &quota
   129  	}
   130  
   131  	if config.CPURealtimePeriod != 0 {
   132  		period := uint64(config.CPURealtimePeriod)
   133  		cpu.RealtimePeriod = &period
   134  	}
   135  
   136  	if config.CPURealtimeRuntime != 0 {
   137  		runtime := uint64(config.CPURealtimeRuntime)
   138  		cpu.RealtimeRuntime = &runtime
   139  	}
   140  
   141  	return &cpu
   142  }
   143  
   144  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
   145  	var stat syscall.Stat_t
   146  	var blkioWeightDevices []specs.WeightDevice
   147  
   148  	for _, weightDevice := range config.BlkioWeightDevice {
   149  		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
   150  			return nil, err
   151  		}
   152  		weight := weightDevice.Weight
   153  		d := specs.WeightDevice{Weight: &weight}
   154  		d.Major = int64(stat.Rdev / 256)
   155  		d.Minor = int64(stat.Rdev % 256)
   156  		blkioWeightDevices = append(blkioWeightDevices, d)
   157  	}
   158  
   159  	return blkioWeightDevices, nil
   160  }
   161  
   162  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   163  	var (
   164  		labelOpts []string
   165  		err       error
   166  	)
   167  
   168  	for _, opt := range config.SecurityOpt {
   169  		if opt == "no-new-privileges" {
   170  			container.NoNewPrivileges = true
   171  			continue
   172  		}
   173  
   174  		var con []string
   175  		if strings.Contains(opt, "=") {
   176  			con = strings.SplitN(opt, "=", 2)
   177  		} else if strings.Contains(opt, ":") {
   178  			con = strings.SplitN(opt, ":", 2)
   179  			logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 1.14, use `=` instead.")
   180  		}
   181  
   182  		if len(con) != 2 {
   183  			return fmt.Errorf("invalid --security-opt 1: %q", opt)
   184  		}
   185  
   186  		switch con[0] {
   187  		case "label":
   188  			labelOpts = append(labelOpts, con[1])
   189  		case "apparmor":
   190  			container.AppArmorProfile = con[1]
   191  		case "seccomp":
   192  			container.SeccompProfile = con[1]
   193  		default:
   194  			return fmt.Errorf("invalid --security-opt 2: %q", opt)
   195  		}
   196  	}
   197  
   198  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   199  	return err
   200  }
   201  
   202  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.ThrottleDevice, error) {
   203  	var throttleDevices []specs.ThrottleDevice
   204  	var stat syscall.Stat_t
   205  
   206  	for _, d := range devs {
   207  		if err := syscall.Stat(d.Path, &stat); err != nil {
   208  			return nil, err
   209  		}
   210  		rate := d.Rate
   211  		d := specs.ThrottleDevice{Rate: &rate}
   212  		d.Major = int64(stat.Rdev / 256)
   213  		d.Minor = int64(stat.Rdev % 256)
   214  		throttleDevices = append(throttleDevices, d)
   215  	}
   216  
   217  	return throttleDevices, nil
   218  }
   219  
   220  func checkKernel() error {
   221  	// Check for unsupported kernel versions
   222  	// FIXME: it would be cleaner to not test for specific versions, but rather
   223  	// test for specific functionalities.
   224  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   225  	// without actually causing a kernel panic, so we need this workaround until
   226  	// the circumstances of pre-3.10 crashes are clearer.
   227  	// For details see https://github.com/docker/docker/issues/407
   228  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   229  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   230  	if !kernel.CheckKernelVersion(3, 10, 0) {
   231  		v, _ := kernel.GetKernelVersion()
   232  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   233  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   234  		}
   235  	}
   236  	return nil
   237  }
   238  
   239  // adaptContainerSettings is called during container creation to modify any
   240  // settings necessary in the HostConfig structure.
   241  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   242  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   243  		// Handle unsupported CPUShares
   244  		if hostConfig.CPUShares < linuxMinCPUShares {
   245  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   246  			hostConfig.CPUShares = linuxMinCPUShares
   247  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   248  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   249  			hostConfig.CPUShares = linuxMaxCPUShares
   250  		}
   251  	}
   252  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   253  		// By default, MemorySwap is set to twice the size of Memory.
   254  		hostConfig.MemorySwap = hostConfig.Memory * 2
   255  	}
   256  	if hostConfig.ShmSize == 0 {
   257  		hostConfig.ShmSize = container.DefaultSHMSize
   258  	}
   259  	var err error
   260  	opts, err := daemon.generateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode, hostConfig.Privileged)
   261  	if err != nil {
   262  		return err
   263  	}
   264  	hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
   265  	if hostConfig.MemorySwappiness == nil {
   266  		defaultSwappiness := int64(-1)
   267  		hostConfig.MemorySwappiness = &defaultSwappiness
   268  	}
   269  	if hostConfig.OomKillDisable == nil {
   270  		defaultOomKillDisable := false
   271  		hostConfig.OomKillDisable = &defaultOomKillDisable
   272  	}
   273  
   274  	return nil
   275  }
   276  
   277  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   278  	warnings := []string{}
   279  
   280  	// memory subsystem checks and adjustments
   281  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   282  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   283  	}
   284  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   285  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   286  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   287  		resources.Memory = 0
   288  		resources.MemorySwap = -1
   289  	}
   290  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   291  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   292  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   293  		resources.MemorySwap = -1
   294  	}
   295  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   296  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   297  	}
   298  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   299  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   300  	}
   301  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness {
   302  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   303  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   304  		resources.MemorySwappiness = nil
   305  	}
   306  	if resources.MemorySwappiness != nil {
   307  		swappiness := *resources.MemorySwappiness
   308  		if swappiness < -1 || swappiness > 100 {
   309  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   310  		}
   311  	}
   312  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   313  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   314  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   315  		resources.MemoryReservation = 0
   316  	}
   317  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   318  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   319  	}
   320  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   321  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   322  	}
   323  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   324  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   325  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   326  		resources.KernelMemory = 0
   327  	}
   328  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   329  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   330  	}
   331  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   332  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   333  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   334  	}
   335  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   336  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   337  		// warning the caller if they already wanted the feature to be off
   338  		if *resources.OomKillDisable {
   339  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   340  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   341  		}
   342  		resources.OomKillDisable = nil
   343  	}
   344  
   345  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   346  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   347  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   348  		resources.PidsLimit = 0
   349  	}
   350  
   351  	// cpu subsystem checks and adjustments
   352  	if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
   353  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
   354  	}
   355  	if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
   356  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
   357  	}
   358  	if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
   359  		return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
   360  	}
   361  	// The highest precision we could get on Linux is 0.001, by setting
   362  	//   cpu.cfs_period_us=1000ms
   363  	//   cpu.cfs_quota=1ms
   364  	// See the following link for details:
   365  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   366  	// Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
   367  	// The error message is 0.01 so that this is consistent with Windows
   368  	if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
   369  		return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
   370  	}
   371  
   372  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   373  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   374  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   375  		resources.CPUShares = 0
   376  	}
   377  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   378  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   379  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   380  		resources.CPUPeriod = 0
   381  	}
   382  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   383  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   384  	}
   385  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   386  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   387  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   388  		resources.CPUQuota = 0
   389  	}
   390  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   391  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   392  	}
   393  	if resources.CPUPercent > 0 {
   394  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   395  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   396  		resources.CPUPercent = 0
   397  	}
   398  
   399  	// cpuset subsystem checks and adjustments
   400  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   401  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   402  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   403  		resources.CpusetCpus = ""
   404  		resources.CpusetMems = ""
   405  	}
   406  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   407  	if err != nil {
   408  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   409  	}
   410  	if !cpusAvailable {
   411  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   412  	}
   413  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   414  	if err != nil {
   415  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   416  	}
   417  	if !memsAvailable {
   418  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   419  	}
   420  
   421  	// blkio subsystem checks and adjustments
   422  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   423  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   424  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   425  		resources.BlkioWeight = 0
   426  	}
   427  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   428  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   429  	}
   430  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   431  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   432  	}
   433  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   434  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   435  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   436  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   437  	}
   438  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   439  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   440  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   441  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   442  	}
   443  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   444  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   445  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   446  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   447  	}
   448  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   449  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   450  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   451  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   452  	}
   453  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   454  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   455  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   456  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   457  	}
   458  
   459  	return warnings, nil
   460  }
   461  
   462  func (daemon *Daemon) getCgroupDriver() string {
   463  	cgroupDriver := cgroupFsDriver
   464  
   465  	if UsingSystemd(daemon.configStore) {
   466  		cgroupDriver = cgroupSystemdDriver
   467  	}
   468  	return cgroupDriver
   469  }
   470  
   471  // getCD gets the raw value of the native.cgroupdriver option, if set.
   472  func getCD(config *Config) string {
   473  	for _, option := range config.ExecOptions {
   474  		key, val, err := parsers.ParseKeyValueOpt(option)
   475  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   476  			continue
   477  		}
   478  		return val
   479  	}
   480  	return ""
   481  }
   482  
   483  // VerifyCgroupDriver validates native.cgroupdriver
   484  func VerifyCgroupDriver(config *Config) error {
   485  	cd := getCD(config)
   486  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   487  		return nil
   488  	}
   489  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   490  }
   491  
   492  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   493  func UsingSystemd(config *Config) bool {
   494  	return getCD(config) == cgroupSystemdDriver
   495  }
   496  
   497  // verifyPlatformContainerSettings performs platform-specific validation of the
   498  // hostconfig and config structures.
   499  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   500  	warnings := []string{}
   501  	sysInfo := sysinfo.New(true)
   502  
   503  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   504  	if err != nil {
   505  		return warnings, err
   506  	}
   507  
   508  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   509  
   510  	// no matter err is nil or not, w could have data in itself.
   511  	warnings = append(warnings, w...)
   512  
   513  	if err != nil {
   514  		return warnings, err
   515  	}
   516  
   517  	if hostConfig.ShmSize < 0 {
   518  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   519  	}
   520  
   521  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   522  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   523  	}
   524  
   525  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   526  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   527  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   528  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   529  	}
   530  	// check for various conflicting options with user namespaces
   531  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   532  		if hostConfig.Privileged {
   533  			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
   534  		}
   535  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   536  			return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
   537  		}
   538  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   539  			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
   540  		}
   541  	}
   542  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   543  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   544  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   545  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   546  		}
   547  	}
   548  	if hostConfig.Runtime == "" {
   549  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   550  	}
   551  
   552  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   553  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   554  	}
   555  
   556  	return warnings, nil
   557  }
   558  
   559  // platformReload update configuration with platform specific options
   560  func (daemon *Daemon) platformReload(config *Config) map[string]string {
   561  	if config.IsValueSet("runtimes") {
   562  		daemon.configStore.Runtimes = config.Runtimes
   563  		// Always set the default one
   564  		daemon.configStore.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   565  	}
   566  
   567  	if config.DefaultRuntime != "" {
   568  		daemon.configStore.DefaultRuntime = config.DefaultRuntime
   569  	}
   570  
   571  	// Update attributes
   572  	var runtimeList bytes.Buffer
   573  	for name, rt := range daemon.configStore.Runtimes {
   574  		if runtimeList.Len() > 0 {
   575  			runtimeList.WriteRune(' ')
   576  		}
   577  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   578  	}
   579  
   580  	return map[string]string{
   581  		"runtimes":        runtimeList.String(),
   582  		"default-runtime": daemon.configStore.DefaultRuntime,
   583  	}
   584  }
   585  
   586  // verifyDaemonSettings performs validation of daemon config struct
   587  func verifyDaemonSettings(config *Config) error {
   588  	// Check for mutually incompatible config options
   589  	if config.bridgeConfig.Iface != "" && config.bridgeConfig.IP != "" {
   590  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   591  	}
   592  	if !config.bridgeConfig.EnableIPTables && !config.bridgeConfig.InterContainerCommunication {
   593  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   594  	}
   595  	if !config.bridgeConfig.EnableIPTables && config.bridgeConfig.EnableIPMasq {
   596  		config.bridgeConfig.EnableIPMasq = false
   597  	}
   598  	if err := VerifyCgroupDriver(config); err != nil {
   599  		return err
   600  	}
   601  	if config.CgroupParent != "" && UsingSystemd(config) {
   602  		if len(config.CgroupParent) <= 6 || !strings.HasSuffix(config.CgroupParent, ".slice") {
   603  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   604  		}
   605  	}
   606  
   607  	if config.DefaultRuntime == "" {
   608  		config.DefaultRuntime = stockRuntimeName
   609  	}
   610  	if config.Runtimes == nil {
   611  		config.Runtimes = make(map[string]types.Runtime)
   612  	}
   613  	config.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   614  
   615  	return nil
   616  }
   617  
   618  // checkSystem validates platform-specific requirements
   619  func checkSystem() error {
   620  	if os.Geteuid() != 0 {
   621  		return fmt.Errorf("The Docker daemon needs to be run as root")
   622  	}
   623  	return checkKernel()
   624  }
   625  
   626  // configureMaxThreads sets the Go runtime max threads threshold
   627  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   628  func configureMaxThreads(config *Config) error {
   629  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   630  	if err != nil {
   631  		return err
   632  	}
   633  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   634  	if err != nil {
   635  		return err
   636  	}
   637  	maxThreads := (mtint / 100) * 90
   638  	debug.SetMaxThreads(maxThreads)
   639  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   640  	return nil
   641  }
   642  
   643  // configureKernelSecuritySupport configures and validates security support for the kernel
   644  func configureKernelSecuritySupport(config *Config, driverName string) error {
   645  	if config.EnableSelinuxSupport {
   646  		if !selinuxEnabled() {
   647  			logrus.Warn("Docker could not enable SELinux on the host system")
   648  		}
   649  	} else {
   650  		selinuxSetDisabled()
   651  	}
   652  	return nil
   653  }
   654  
   655  func (daemon *Daemon) initNetworkController(config *Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   656  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   657  	if err != nil {
   658  		return nil, err
   659  	}
   660  
   661  	controller, err := libnetwork.New(netOptions...)
   662  	if err != nil {
   663  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   664  	}
   665  
   666  	if len(activeSandboxes) > 0 {
   667  		logrus.Info("There are old running containers, the network config will not take affect")
   668  		return controller, nil
   669  	}
   670  
   671  	// Initialize default network on "null"
   672  	if n, _ := controller.NetworkByName("none"); n == nil {
   673  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   674  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   675  		}
   676  	}
   677  
   678  	// Initialize default network on "host"
   679  	if n, _ := controller.NetworkByName("host"); n == nil {
   680  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   681  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   682  		}
   683  	}
   684  
   685  	// Clear stale bridge network
   686  	if n, err := controller.NetworkByName("bridge"); err == nil {
   687  		if err = n.Delete(); err != nil {
   688  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   689  		}
   690  	}
   691  
   692  	if !config.DisableBridge {
   693  		// Initialize default driver "bridge"
   694  		if err := initBridgeDriver(controller, config); err != nil {
   695  			return nil, err
   696  		}
   697  	} else {
   698  		removeDefaultBridgeInterface()
   699  	}
   700  
   701  	return controller, nil
   702  }
   703  
   704  func driverOptions(config *Config) []nwconfig.Option {
   705  	bridgeConfig := options.Generic{
   706  		"EnableIPForwarding":  config.bridgeConfig.EnableIPForward,
   707  		"EnableIPTables":      config.bridgeConfig.EnableIPTables,
   708  		"EnableUserlandProxy": config.bridgeConfig.EnableUserlandProxy,
   709  		"UserlandProxyPath":   config.bridgeConfig.UserlandProxyPath}
   710  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   711  
   712  	dOptions := []nwconfig.Option{}
   713  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   714  	return dOptions
   715  }
   716  
   717  func initBridgeDriver(controller libnetwork.NetworkController, config *Config) error {
   718  	bridgeName := bridge.DefaultBridgeName
   719  	if config.bridgeConfig.Iface != "" {
   720  		bridgeName = config.bridgeConfig.Iface
   721  	}
   722  	netOption := map[string]string{
   723  		bridge.BridgeName:         bridgeName,
   724  		bridge.DefaultBridge:      strconv.FormatBool(true),
   725  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   726  		bridge.EnableIPMasquerade: strconv.FormatBool(config.bridgeConfig.EnableIPMasq),
   727  		bridge.EnableICC:          strconv.FormatBool(config.bridgeConfig.InterContainerCommunication),
   728  	}
   729  
   730  	// --ip processing
   731  	if config.bridgeConfig.DefaultIP != nil {
   732  		netOption[bridge.DefaultBindingIP] = config.bridgeConfig.DefaultIP.String()
   733  	}
   734  
   735  	var (
   736  		ipamV4Conf *libnetwork.IpamConf
   737  		ipamV6Conf *libnetwork.IpamConf
   738  	)
   739  
   740  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   741  
   742  	nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   743  	if err != nil {
   744  		return errors.Wrap(err, "list bridge addresses failed")
   745  	}
   746  
   747  	nw := nwList[0]
   748  	if len(nwList) > 1 && config.bridgeConfig.FixedCIDR != "" {
   749  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   750  		if err != nil {
   751  			return errors.Wrap(err, "parse CIDR failed")
   752  		}
   753  		// Iterate through in case there are multiple addresses for the bridge
   754  		for _, entry := range nwList {
   755  			if fCIDR.Contains(entry.IP) {
   756  				nw = entry
   757  				break
   758  			}
   759  		}
   760  	}
   761  
   762  	ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   763  	hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   764  	if hip.IsGlobalUnicast() {
   765  		ipamV4Conf.Gateway = nw.IP.String()
   766  	}
   767  
   768  	if config.bridgeConfig.IP != "" {
   769  		ipamV4Conf.PreferredPool = config.bridgeConfig.IP
   770  		ip, _, err := net.ParseCIDR(config.bridgeConfig.IP)
   771  		if err != nil {
   772  			return err
   773  		}
   774  		ipamV4Conf.Gateway = ip.String()
   775  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   776  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   777  	}
   778  
   779  	if config.bridgeConfig.FixedCIDR != "" {
   780  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   781  		if err != nil {
   782  			return err
   783  		}
   784  
   785  		ipamV4Conf.SubPool = fCIDR.String()
   786  	}
   787  
   788  	if config.bridgeConfig.DefaultGatewayIPv4 != nil {
   789  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.bridgeConfig.DefaultGatewayIPv4.String()
   790  	}
   791  
   792  	var deferIPv6Alloc bool
   793  	if config.bridgeConfig.FixedCIDRv6 != "" {
   794  		_, fCIDRv6, err := net.ParseCIDR(config.bridgeConfig.FixedCIDRv6)
   795  		if err != nil {
   796  			return err
   797  		}
   798  
   799  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
   800  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
   801  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
   802  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
   803  		// on this network until after the driver has created the endpoint and returned the
   804  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
   805  		ones, _ := fCIDRv6.Mask.Size()
   806  		deferIPv6Alloc = ones <= 80
   807  
   808  		if ipamV6Conf == nil {
   809  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   810  		}
   811  		ipamV6Conf.PreferredPool = fCIDRv6.String()
   812  
   813  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
   814  		// address belongs to the same network, we need to inform libnetwork about it, so
   815  		// that it can be reserved with IPAM and it will not be given away to somebody else
   816  		for _, nw6 := range nw6List {
   817  			if fCIDRv6.Contains(nw6.IP) {
   818  				ipamV6Conf.Gateway = nw6.IP.String()
   819  				break
   820  			}
   821  		}
   822  	}
   823  
   824  	if config.bridgeConfig.DefaultGatewayIPv6 != nil {
   825  		if ipamV6Conf == nil {
   826  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   827  		}
   828  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.bridgeConfig.DefaultGatewayIPv6.String()
   829  	}
   830  
   831  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
   832  	v6Conf := []*libnetwork.IpamConf{}
   833  	if ipamV6Conf != nil {
   834  		v6Conf = append(v6Conf, ipamV6Conf)
   835  	}
   836  	// Initialize default network on "bridge" with the same name
   837  	_, err = controller.NewNetwork("bridge", "bridge", "",
   838  		libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6),
   839  		libnetwork.NetworkOptionDriverOpts(netOption),
   840  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
   841  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
   842  	if err != nil {
   843  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
   844  	}
   845  	return nil
   846  }
   847  
   848  // Remove default bridge interface if present (--bridge=none use case)
   849  func removeDefaultBridgeInterface() {
   850  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
   851  		if err := netlink.LinkDel(lnk); err != nil {
   852  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
   853  		}
   854  	}
   855  }
   856  
   857  func (daemon *Daemon) getLayerInit() func(string) error {
   858  	return daemon.setupInitLayer
   859  }
   860  
   861  // Parse the remapped root (user namespace) option, which can be one of:
   862  //   username            - valid username from /etc/passwd
   863  //   username:groupname  - valid username; valid groupname from /etc/group
   864  //   uid                 - 32-bit unsigned int valid Linux UID value
   865  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
   866  //
   867  //  If no groupname is specified, and a username is specified, an attempt
   868  //  will be made to lookup a gid for that username as a groupname
   869  //
   870  //  If names are used, they are verified to exist in passwd/group
   871  func parseRemappedRoot(usergrp string) (string, string, error) {
   872  
   873  	var (
   874  		userID, groupID     int
   875  		username, groupname string
   876  	)
   877  
   878  	idparts := strings.Split(usergrp, ":")
   879  	if len(idparts) > 2 {
   880  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
   881  	}
   882  
   883  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
   884  		// must be a uid; take it as valid
   885  		userID = int(uid)
   886  		luser, err := idtools.LookupUID(userID)
   887  		if err != nil {
   888  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
   889  		}
   890  		username = luser.Name
   891  		if len(idparts) == 1 {
   892  			// if the uid was numeric and no gid was specified, take the uid as the gid
   893  			groupID = userID
   894  			lgrp, err := idtools.LookupGID(groupID)
   895  			if err != nil {
   896  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
   897  			}
   898  			groupname = lgrp.Name
   899  		}
   900  	} else {
   901  		lookupName := idparts[0]
   902  		// special case: if the user specified "default", they want Docker to create or
   903  		// use (after creation) the "dockremap" user/group for root remapping
   904  		if lookupName == defaultIDSpecifier {
   905  			lookupName = defaultRemappedID
   906  		}
   907  		luser, err := idtools.LookupUser(lookupName)
   908  		if err != nil && idparts[0] != defaultIDSpecifier {
   909  			// error if the name requested isn't the special "dockremap" ID
   910  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
   911  		} else if err != nil {
   912  			// special case-- if the username == "default", then we have been asked
   913  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
   914  			// ranges will be used for the user and group mappings in user namespaced containers
   915  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
   916  			if err == nil {
   917  				return defaultRemappedID, defaultRemappedID, nil
   918  			}
   919  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
   920  		}
   921  		username = luser.Name
   922  		if len(idparts) == 1 {
   923  			// we only have a string username, and no group specified; look up gid from username as group
   924  			group, err := idtools.LookupGroup(lookupName)
   925  			if err != nil {
   926  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
   927  			}
   928  			groupID = group.Gid
   929  			groupname = group.Name
   930  		}
   931  	}
   932  
   933  	if len(idparts) == 2 {
   934  		// groupname or gid is separately specified and must be resolved
   935  		// to an unsigned 32-bit gid
   936  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
   937  			// must be a gid, take it as valid
   938  			groupID = int(gid)
   939  			lgrp, err := idtools.LookupGID(groupID)
   940  			if err != nil {
   941  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
   942  			}
   943  			groupname = lgrp.Name
   944  		} else {
   945  			// not a number; attempt a lookup
   946  			if _, err := idtools.LookupGroup(idparts[1]); err != nil {
   947  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
   948  			}
   949  			groupname = idparts[1]
   950  		}
   951  	}
   952  	return username, groupname, nil
   953  }
   954  
   955  func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
   956  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
   957  		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
   958  	}
   959  
   960  	// if the daemon was started with remapped root option, parse
   961  	// the config option to the int uid,gid values
   962  	var (
   963  		uidMaps, gidMaps []idtools.IDMap
   964  	)
   965  	if config.RemappedRoot != "" {
   966  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
   967  		if err != nil {
   968  			return nil, nil, err
   969  		}
   970  		if username == "root" {
   971  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
   972  			// effectively
   973  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
   974  			return uidMaps, gidMaps, nil
   975  		}
   976  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
   977  		// update remapped root setting now that we have resolved them to actual names
   978  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
   979  
   980  		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
   981  		if err != nil {
   982  			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
   983  		}
   984  	}
   985  	return uidMaps, gidMaps, nil
   986  }
   987  
   988  func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
   989  	config.Root = rootDir
   990  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
   991  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
   992  	// (e.g. mounted layers of a container) can traverse this path.
   993  	// The user namespace support will create subdirectories for the remapped root host uid:gid
   994  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
   995  	// layer content subtrees.
   996  	if _, err := os.Stat(rootDir); err == nil {
   997  		// root current exists; verify the access bits are correct by setting them
   998  		if err = os.Chmod(rootDir, 0711); err != nil {
   999  			return err
  1000  		}
  1001  	} else if os.IsNotExist(err) {
  1002  		// no root exists yet, create it 0711 with root:root ownership
  1003  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1004  			return err
  1005  		}
  1006  	}
  1007  
  1008  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1009  	// with any/all specified remapped root uid/gid options on the daemon creating
  1010  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1011  	// `chdir()` to work for containers namespaced to that uid/gid)
  1012  	if config.RemappedRoot != "" {
  1013  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
  1014  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1015  		// Create the root directory if it doesn't exist
  1016  		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
  1017  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1018  		}
  1019  		// we also need to verify that any pre-existing directories in the path to
  1020  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1021  		// has strict permissions that don't allow "x", container start will fail, so
  1022  		// better to warn and fail now
  1023  		dirPath := config.Root
  1024  		for {
  1025  			dirPath = filepath.Dir(dirPath)
  1026  			if dirPath == "/" {
  1027  				break
  1028  			}
  1029  			if !idtools.CanAccess(dirPath, rootUID, rootGID) {
  1030  				return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1031  			}
  1032  		}
  1033  	}
  1034  	return nil
  1035  }
  1036  
  1037  // registerLinks writes the links to a file.
  1038  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1039  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1040  		return nil
  1041  	}
  1042  
  1043  	for _, l := range hostConfig.Links {
  1044  		name, alias, err := runconfigopts.ParseLink(l)
  1045  		if err != nil {
  1046  			return err
  1047  		}
  1048  		child, err := daemon.GetContainer(name)
  1049  		if err != nil {
  1050  			return fmt.Errorf("Could not get container for %s", name)
  1051  		}
  1052  		for child.HostConfig.NetworkMode.IsContainer() {
  1053  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1054  			child, err = daemon.GetContainer(parts[1])
  1055  			if err != nil {
  1056  				return fmt.Errorf("Could not get container for %s", parts[1])
  1057  			}
  1058  		}
  1059  		if child.HostConfig.NetworkMode.IsHost() {
  1060  			return runconfig.ErrConflictHostNetworkAndLinks
  1061  		}
  1062  		if err := daemon.registerLink(container, child, alias); err != nil {
  1063  			return err
  1064  		}
  1065  	}
  1066  
  1067  	// After we load all the links into the daemon
  1068  	// set them to nil on the hostconfig
  1069  	return container.WriteHostConfig()
  1070  }
  1071  
  1072  // conditionalMountOnStart is a platform specific helper function during the
  1073  // container start to call mount.
  1074  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1075  	return daemon.Mount(container)
  1076  }
  1077  
  1078  // conditionalUnmountOnCleanup is a platform specific helper function called
  1079  // during the cleanup of a container to unmount.
  1080  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1081  	return daemon.Unmount(container)
  1082  }
  1083  
  1084  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1085  	if !c.IsRunning() {
  1086  		return nil, errNotRunning{c.ID}
  1087  	}
  1088  	stats, err := daemon.containerd.Stats(c.ID)
  1089  	if err != nil {
  1090  		return nil, err
  1091  	}
  1092  	s := &types.StatsJSON{}
  1093  	cgs := stats.CgroupStats
  1094  	if cgs != nil {
  1095  		s.BlkioStats = types.BlkioStats{
  1096  			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1097  			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1098  			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1099  			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1100  			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1101  			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1102  			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1103  			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1104  		}
  1105  		cpu := cgs.CpuStats
  1106  		s.CPUStats = types.CPUStats{
  1107  			CPUUsage: types.CPUUsage{
  1108  				TotalUsage:        cpu.CpuUsage.TotalUsage,
  1109  				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
  1110  				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1111  				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
  1112  			},
  1113  			ThrottlingData: types.ThrottlingData{
  1114  				Periods:          cpu.ThrottlingData.Periods,
  1115  				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1116  				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
  1117  			},
  1118  		}
  1119  		mem := cgs.MemoryStats.Usage
  1120  		s.MemoryStats = types.MemoryStats{
  1121  			Usage:    mem.Usage,
  1122  			MaxUsage: mem.MaxUsage,
  1123  			Stats:    cgs.MemoryStats.Stats,
  1124  			Failcnt:  mem.Failcnt,
  1125  			Limit:    mem.Limit,
  1126  		}
  1127  		// if the container does not set memory limit, use the machineMemory
  1128  		if mem.Limit > daemon.statsCollector.machineMemory && daemon.statsCollector.machineMemory > 0 {
  1129  			s.MemoryStats.Limit = daemon.statsCollector.machineMemory
  1130  		}
  1131  		if cgs.PidsStats != nil {
  1132  			s.PidsStats = types.PidsStats{
  1133  				Current: cgs.PidsStats.Current,
  1134  			}
  1135  		}
  1136  	}
  1137  	s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1138  	if err != nil {
  1139  		return nil, err
  1140  	}
  1141  	return s, nil
  1142  }
  1143  
  1144  // setDefaultIsolation determines the default isolation mode for the
  1145  // daemon to run in. This is only applicable on Windows
  1146  func (daemon *Daemon) setDefaultIsolation() error {
  1147  	return nil
  1148  }
  1149  
  1150  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1151  	var layers []string
  1152  	for _, l := range rootfs.DiffIDs {
  1153  		layers = append(layers, l.String())
  1154  	}
  1155  	return types.RootFS{
  1156  		Type:   rootfs.Type,
  1157  		Layers: layers,
  1158  	}
  1159  }
  1160  
  1161  // setupDaemonProcess sets various settings for the daemon's process
  1162  func setupDaemonProcess(config *Config) error {
  1163  	// setup the daemons oom_score_adj
  1164  	return setupOOMScoreAdj(config.OOMScoreAdjust)
  1165  }
  1166  
  1167  func setupOOMScoreAdj(score int) error {
  1168  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1169  	if err != nil {
  1170  		return err
  1171  	}
  1172  
  1173  	stringScore := strconv.Itoa(score)
  1174  	_, err = f.WriteString(stringScore)
  1175  	if os.IsPermission(err) {
  1176  		// Setting oom_score_adj does not work in an
  1177  		// unprivileged container. Ignore the error, but log
  1178  		// it if we appear not to be in that situation.
  1179  		if !rsystem.RunningInUserNS() {
  1180  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1181  		}
  1182  		return nil
  1183  	}
  1184  	f.Close()
  1185  	return err
  1186  }
  1187  
  1188  func (daemon *Daemon) initCgroupsPath(path string) error {
  1189  	if path == "/" || path == "." {
  1190  		return nil
  1191  	}
  1192  
  1193  	if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 {
  1194  		return nil
  1195  	}
  1196  
  1197  	// Recursively create cgroup to ensure that the system and all parent cgroups have values set
  1198  	// for the period and runtime as this limits what the children can be set to.
  1199  	daemon.initCgroupsPath(filepath.Dir(path))
  1200  
  1201  	_, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1202  	if err != nil {
  1203  		return err
  1204  	}
  1205  
  1206  	path = filepath.Join(root, path)
  1207  	sysinfo := sysinfo.New(true)
  1208  	if sysinfo.CPURealtimePeriod && daemon.configStore.CPURealtimePeriod != 0 {
  1209  		if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1210  			return err
  1211  		}
  1212  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_period_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimePeriod, 10)), 0700); err != nil {
  1213  			return err
  1214  		}
  1215  	}
  1216  	if sysinfo.CPURealtimeRuntime && daemon.configStore.CPURealtimeRuntime != 0 {
  1217  		if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1218  			return err
  1219  		}
  1220  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_runtime_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimeRuntime, 10)), 0700); err != nil {
  1221  			return err
  1222  		}
  1223  	}
  1224  	return nil
  1225  }
  1226  
  1227  func (daemon *Daemon) setupSeccompProfile() error {
  1228  	if daemon.configStore.SeccompProfile != "" {
  1229  		daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1230  		b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1231  		if err != nil {
  1232  			return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1233  		}
  1234  		daemon.seccompProfile = b
  1235  	}
  1236  	return nil
  1237  }