github.com/kim0/docker@v0.6.2-0.20161130212042-4addda3f07e7/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"runtime/debug"
    14  	"strconv"
    15  	"strings"
    16  	"syscall"
    17  
    18  	"github.com/Sirupsen/logrus"
    19  	"github.com/docker/docker/api/types"
    20  	"github.com/docker/docker/api/types/blkiodev"
    21  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    22  	containertypes "github.com/docker/docker/api/types/container"
    23  	"github.com/docker/docker/container"
    24  	"github.com/docker/docker/image"
    25  	"github.com/docker/docker/pkg/idtools"
    26  	"github.com/docker/docker/pkg/parsers"
    27  	"github.com/docker/docker/pkg/parsers/kernel"
    28  	"github.com/docker/docker/pkg/sysinfo"
    29  	"github.com/docker/docker/runconfig"
    30  	runconfigopts "github.com/docker/docker/runconfig/opts"
    31  	"github.com/docker/libnetwork"
    32  	nwconfig "github.com/docker/libnetwork/config"
    33  	"github.com/docker/libnetwork/drivers/bridge"
    34  	"github.com/docker/libnetwork/netlabel"
    35  	"github.com/docker/libnetwork/netutils"
    36  	"github.com/docker/libnetwork/options"
    37  	lntypes "github.com/docker/libnetwork/types"
    38  	"github.com/golang/protobuf/ptypes"
    39  	"github.com/opencontainers/runc/libcontainer/cgroups"
    40  	"github.com/opencontainers/runc/libcontainer/label"
    41  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    42  	"github.com/opencontainers/runc/libcontainer/user"
    43  	specs "github.com/opencontainers/runtime-spec/specs-go"
    44  	"github.com/pkg/errors"
    45  	"github.com/vishvananda/netlink"
    46  )
    47  
    48  const (
    49  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    50  	linuxMinCPUShares = 2
    51  	linuxMaxCPUShares = 262144
    52  	platformSupported = true
    53  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    54  	linuxMinMemory = 4194304
    55  	// constants for remapped root settings
    56  	defaultIDSpecifier string = "default"
    57  	defaultRemappedID  string = "dockremap"
    58  
    59  	// constant for cgroup drivers
    60  	cgroupFsDriver      = "cgroupfs"
    61  	cgroupSystemdDriver = "systemd"
    62  )
    63  
    64  func getMemoryResources(config containertypes.Resources) *specs.Memory {
    65  	memory := specs.Memory{}
    66  
    67  	if config.Memory > 0 {
    68  		limit := uint64(config.Memory)
    69  		memory.Limit = &limit
    70  	}
    71  
    72  	if config.MemoryReservation > 0 {
    73  		reservation := uint64(config.MemoryReservation)
    74  		memory.Reservation = &reservation
    75  	}
    76  
    77  	if config.MemorySwap != 0 {
    78  		swap := uint64(config.MemorySwap)
    79  		memory.Swap = &swap
    80  	}
    81  
    82  	if config.MemorySwappiness != nil {
    83  		swappiness := uint64(*config.MemorySwappiness)
    84  		memory.Swappiness = &swappiness
    85  	}
    86  
    87  	if config.KernelMemory != 0 {
    88  		kernelMemory := uint64(config.KernelMemory)
    89  		memory.Kernel = &kernelMemory
    90  	}
    91  
    92  	return &memory
    93  }
    94  
    95  func getCPUResources(config containertypes.Resources) *specs.CPU {
    96  	cpu := specs.CPU{}
    97  
    98  	if config.CPUShares != 0 {
    99  		shares := uint64(config.CPUShares)
   100  		cpu.Shares = &shares
   101  	}
   102  
   103  	if config.CpusetCpus != "" {
   104  		cpuset := config.CpusetCpus
   105  		cpu.Cpus = &cpuset
   106  	}
   107  
   108  	if config.CpusetMems != "" {
   109  		cpuset := config.CpusetMems
   110  		cpu.Mems = &cpuset
   111  	}
   112  
   113  	if config.CPUPeriod != 0 {
   114  		period := uint64(config.CPUPeriod)
   115  		cpu.Period = &period
   116  	}
   117  
   118  	if config.CPUQuota != 0 {
   119  		quota := uint64(config.CPUQuota)
   120  		cpu.Quota = &quota
   121  	}
   122  
   123  	if config.CPURealtimePeriod != 0 {
   124  		period := uint64(config.CPURealtimePeriod)
   125  		cpu.RealtimePeriod = &period
   126  	}
   127  
   128  	if config.CPURealtimeRuntime != 0 {
   129  		runtime := uint64(config.CPURealtimeRuntime)
   130  		cpu.RealtimeRuntime = &runtime
   131  	}
   132  
   133  	return &cpu
   134  }
   135  
   136  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
   137  	var stat syscall.Stat_t
   138  	var blkioWeightDevices []specs.WeightDevice
   139  
   140  	for _, weightDevice := range config.BlkioWeightDevice {
   141  		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
   142  			return nil, err
   143  		}
   144  		weight := weightDevice.Weight
   145  		d := specs.WeightDevice{Weight: &weight}
   146  		d.Major = int64(stat.Rdev / 256)
   147  		d.Minor = int64(stat.Rdev % 256)
   148  		blkioWeightDevices = append(blkioWeightDevices, d)
   149  	}
   150  
   151  	return blkioWeightDevices, nil
   152  }
   153  
   154  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   155  	var (
   156  		labelOpts []string
   157  		err       error
   158  	)
   159  
   160  	for _, opt := range config.SecurityOpt {
   161  		if opt == "no-new-privileges" {
   162  			container.NoNewPrivileges = true
   163  		} else {
   164  			var con []string
   165  			if strings.Contains(opt, "=") {
   166  				con = strings.SplitN(opt, "=", 2)
   167  			} else if strings.Contains(opt, ":") {
   168  				con = strings.SplitN(opt, ":", 2)
   169  				logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 1.13, use `=` instead.")
   170  			}
   171  
   172  			if len(con) != 2 {
   173  				return fmt.Errorf("Invalid --security-opt 1: %q", opt)
   174  			}
   175  
   176  			switch con[0] {
   177  			case "label":
   178  				labelOpts = append(labelOpts, con[1])
   179  			case "apparmor":
   180  				container.AppArmorProfile = con[1]
   181  			case "seccomp":
   182  				container.SeccompProfile = con[1]
   183  			default:
   184  				return fmt.Errorf("Invalid --security-opt 2: %q", opt)
   185  			}
   186  		}
   187  	}
   188  
   189  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   190  	return err
   191  }
   192  
   193  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.ThrottleDevice, error) {
   194  	var throttleDevices []specs.ThrottleDevice
   195  	var stat syscall.Stat_t
   196  
   197  	for _, d := range devs {
   198  		if err := syscall.Stat(d.Path, &stat); err != nil {
   199  			return nil, err
   200  		}
   201  		rate := d.Rate
   202  		d := specs.ThrottleDevice{Rate: &rate}
   203  		d.Major = int64(stat.Rdev / 256)
   204  		d.Minor = int64(stat.Rdev % 256)
   205  		throttleDevices = append(throttleDevices, d)
   206  	}
   207  
   208  	return throttleDevices, nil
   209  }
   210  
   211  func checkKernel() error {
   212  	// Check for unsupported kernel versions
   213  	// FIXME: it would be cleaner to not test for specific versions, but rather
   214  	// test for specific functionalities.
   215  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   216  	// without actually causing a kernel panic, so we need this workaround until
   217  	// the circumstances of pre-3.10 crashes are clearer.
   218  	// For details see https://github.com/docker/docker/issues/407
   219  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   220  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   221  	if !kernel.CheckKernelVersion(3, 10, 0) {
   222  		v, _ := kernel.GetKernelVersion()
   223  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   224  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   225  		}
   226  	}
   227  	return nil
   228  }
   229  
   230  // adaptContainerSettings is called during container creation to modify any
   231  // settings necessary in the HostConfig structure.
   232  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   233  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   234  		// Handle unsupported CPUShares
   235  		if hostConfig.CPUShares < linuxMinCPUShares {
   236  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   237  			hostConfig.CPUShares = linuxMinCPUShares
   238  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   239  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   240  			hostConfig.CPUShares = linuxMaxCPUShares
   241  		}
   242  	}
   243  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   244  		// By default, MemorySwap is set to twice the size of Memory.
   245  		hostConfig.MemorySwap = hostConfig.Memory * 2
   246  	}
   247  	if hostConfig.ShmSize == 0 {
   248  		hostConfig.ShmSize = container.DefaultSHMSize
   249  	}
   250  	var err error
   251  	if hostConfig.SecurityOpt == nil {
   252  		hostConfig.SecurityOpt, err = daemon.generateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode, hostConfig.Privileged)
   253  		if err != nil {
   254  			return err
   255  		}
   256  	}
   257  	if hostConfig.MemorySwappiness == nil {
   258  		defaultSwappiness := int64(-1)
   259  		hostConfig.MemorySwappiness = &defaultSwappiness
   260  	}
   261  	if hostConfig.OomKillDisable == nil {
   262  		defaultOomKillDisable := false
   263  		hostConfig.OomKillDisable = &defaultOomKillDisable
   264  	}
   265  
   266  	return nil
   267  }
   268  
   269  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   270  	warnings := []string{}
   271  
   272  	// memory subsystem checks and adjustments
   273  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   274  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   275  	}
   276  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   277  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   278  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   279  		resources.Memory = 0
   280  		resources.MemorySwap = -1
   281  	}
   282  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   283  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   284  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   285  		resources.MemorySwap = -1
   286  	}
   287  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   288  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   289  	}
   290  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   291  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   292  	}
   293  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness {
   294  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   295  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   296  		resources.MemorySwappiness = nil
   297  	}
   298  	if resources.MemorySwappiness != nil {
   299  		swappiness := *resources.MemorySwappiness
   300  		if swappiness < -1 || swappiness > 100 {
   301  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   302  		}
   303  	}
   304  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   305  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   306  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   307  		resources.MemoryReservation = 0
   308  	}
   309  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   310  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   311  	}
   312  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   313  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   314  	}
   315  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   316  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   317  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   318  		resources.KernelMemory = 0
   319  	}
   320  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   321  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   322  	}
   323  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   324  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   325  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   326  	}
   327  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   328  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   329  		// warning the caller if they already wanted the feature to be off
   330  		if *resources.OomKillDisable {
   331  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   332  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   333  		}
   334  		resources.OomKillDisable = nil
   335  	}
   336  
   337  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   338  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   339  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   340  		resources.PidsLimit = 0
   341  	}
   342  
   343  	// cpu subsystem checks and adjustments
   344  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   345  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   346  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   347  		resources.CPUShares = 0
   348  	}
   349  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   350  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   351  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   352  		resources.CPUPeriod = 0
   353  	}
   354  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   355  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   356  	}
   357  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   358  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   359  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   360  		resources.CPUQuota = 0
   361  	}
   362  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   363  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   364  	}
   365  	if resources.CPUPercent > 0 {
   366  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   367  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   368  		resources.CPUPercent = 0
   369  	}
   370  
   371  	// cpuset subsystem checks and adjustments
   372  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   373  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   374  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   375  		resources.CpusetCpus = ""
   376  		resources.CpusetMems = ""
   377  	}
   378  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   379  	if err != nil {
   380  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   381  	}
   382  	if !cpusAvailable {
   383  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   384  	}
   385  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   386  	if err != nil {
   387  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   388  	}
   389  	if !memsAvailable {
   390  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   391  	}
   392  
   393  	// blkio subsystem checks and adjustments
   394  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   395  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   396  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   397  		resources.BlkioWeight = 0
   398  	}
   399  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   400  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   401  	}
   402  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   403  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   404  	}
   405  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   406  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   407  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   408  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   409  	}
   410  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   411  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   412  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   413  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   414  	}
   415  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   416  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   417  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   418  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   419  	}
   420  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   421  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   422  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   423  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   424  	}
   425  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   426  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   427  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   428  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   429  	}
   430  
   431  	return warnings, nil
   432  }
   433  
   434  func (daemon *Daemon) getCgroupDriver() string {
   435  	cgroupDriver := cgroupFsDriver
   436  
   437  	if UsingSystemd(daemon.configStore) {
   438  		cgroupDriver = cgroupSystemdDriver
   439  	}
   440  	return cgroupDriver
   441  }
   442  
   443  // getCD gets the raw value of the native.cgroupdriver option, if set.
   444  func getCD(config *Config) string {
   445  	for _, option := range config.ExecOptions {
   446  		key, val, err := parsers.ParseKeyValueOpt(option)
   447  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   448  			continue
   449  		}
   450  		return val
   451  	}
   452  	return ""
   453  }
   454  
   455  // VerifyCgroupDriver validates native.cgroupdriver
   456  func VerifyCgroupDriver(config *Config) error {
   457  	cd := getCD(config)
   458  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   459  		return nil
   460  	}
   461  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   462  }
   463  
   464  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   465  func UsingSystemd(config *Config) bool {
   466  	return getCD(config) == cgroupSystemdDriver
   467  }
   468  
   469  // verifyPlatformContainerSettings performs platform-specific validation of the
   470  // hostconfig and config structures.
   471  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   472  	warnings := []string{}
   473  	sysInfo := sysinfo.New(true)
   474  
   475  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   476  	if err != nil {
   477  		return warnings, err
   478  	}
   479  
   480  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   481  
   482  	// no matter err is nil or not, w could have data in itself.
   483  	warnings = append(warnings, w...)
   484  
   485  	if err != nil {
   486  		return warnings, err
   487  	}
   488  
   489  	if hostConfig.ShmSize < 0 {
   490  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   491  	}
   492  
   493  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   494  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   495  	}
   496  
   497  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   498  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   499  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   500  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   501  	}
   502  	// check for various conflicting options with user namespaces
   503  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   504  		if hostConfig.Privileged {
   505  			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
   506  		}
   507  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   508  			return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
   509  		}
   510  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   511  			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
   512  		}
   513  	}
   514  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   515  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   516  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   517  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   518  		}
   519  	}
   520  	if hostConfig.Runtime == "" {
   521  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   522  	}
   523  
   524  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   525  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   526  	}
   527  
   528  	return warnings, nil
   529  }
   530  
   531  // platformReload update configuration with platform specific options
   532  func (daemon *Daemon) platformReload(config *Config) map[string]string {
   533  	if config.IsValueSet("runtimes") {
   534  		daemon.configStore.Runtimes = config.Runtimes
   535  		// Always set the default one
   536  		daemon.configStore.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   537  	}
   538  
   539  	if config.DefaultRuntime != "" {
   540  		daemon.configStore.DefaultRuntime = config.DefaultRuntime
   541  	}
   542  
   543  	// Update attributes
   544  	var runtimeList bytes.Buffer
   545  	for name, rt := range daemon.configStore.Runtimes {
   546  		if runtimeList.Len() > 0 {
   547  			runtimeList.WriteRune(' ')
   548  		}
   549  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   550  	}
   551  
   552  	return map[string]string{
   553  		"runtimes":        runtimeList.String(),
   554  		"default-runtime": daemon.configStore.DefaultRuntime,
   555  	}
   556  }
   557  
   558  // verifyDaemonSettings performs validation of daemon config struct
   559  func verifyDaemonSettings(config *Config) error {
   560  	// Check for mutually incompatible config options
   561  	if config.bridgeConfig.Iface != "" && config.bridgeConfig.IP != "" {
   562  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   563  	}
   564  	if !config.bridgeConfig.EnableIPTables && !config.bridgeConfig.InterContainerCommunication {
   565  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   566  	}
   567  	if !config.bridgeConfig.EnableIPTables && config.bridgeConfig.EnableIPMasq {
   568  		config.bridgeConfig.EnableIPMasq = false
   569  	}
   570  	if err := VerifyCgroupDriver(config); err != nil {
   571  		return err
   572  	}
   573  	if config.CgroupParent != "" && UsingSystemd(config) {
   574  		if len(config.CgroupParent) <= 6 || !strings.HasSuffix(config.CgroupParent, ".slice") {
   575  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   576  		}
   577  	}
   578  
   579  	if config.DefaultRuntime == "" {
   580  		config.DefaultRuntime = stockRuntimeName
   581  	}
   582  	if config.Runtimes == nil {
   583  		config.Runtimes = make(map[string]types.Runtime)
   584  	}
   585  	config.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   586  
   587  	return nil
   588  }
   589  
   590  // checkSystem validates platform-specific requirements
   591  func checkSystem() error {
   592  	if os.Geteuid() != 0 {
   593  		return fmt.Errorf("The Docker daemon needs to be run as root")
   594  	}
   595  	return checkKernel()
   596  }
   597  
   598  // configureMaxThreads sets the Go runtime max threads threshold
   599  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   600  func configureMaxThreads(config *Config) error {
   601  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   602  	if err != nil {
   603  		return err
   604  	}
   605  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   606  	if err != nil {
   607  		return err
   608  	}
   609  	maxThreads := (mtint / 100) * 90
   610  	debug.SetMaxThreads(maxThreads)
   611  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   612  	return nil
   613  }
   614  
   615  // configureKernelSecuritySupport configures and validates security support for the kernel
   616  func configureKernelSecuritySupport(config *Config, driverName string) error {
   617  	if config.EnableSelinuxSupport {
   618  		if !selinuxEnabled() {
   619  			logrus.Warn("Docker could not enable SELinux on the host system")
   620  		}
   621  	} else {
   622  		selinuxSetDisabled()
   623  	}
   624  	return nil
   625  }
   626  
   627  func (daemon *Daemon) initNetworkController(config *Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   628  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   629  	if err != nil {
   630  		return nil, err
   631  	}
   632  
   633  	controller, err := libnetwork.New(netOptions...)
   634  	if err != nil {
   635  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   636  	}
   637  
   638  	if len(activeSandboxes) > 0 {
   639  		logrus.Infof("There are old running containers, the network config will not take affect")
   640  		return controller, nil
   641  	}
   642  
   643  	// Initialize default network on "null"
   644  	if n, _ := controller.NetworkByName("none"); n == nil {
   645  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   646  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   647  		}
   648  	}
   649  
   650  	// Initialize default network on "host"
   651  	if n, _ := controller.NetworkByName("host"); n == nil {
   652  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   653  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   654  		}
   655  	}
   656  
   657  	// Clear stale bridge network
   658  	if n, err := controller.NetworkByName("bridge"); err == nil {
   659  		if err = n.Delete(); err != nil {
   660  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   661  		}
   662  	}
   663  
   664  	if !config.DisableBridge {
   665  		// Initialize default driver "bridge"
   666  		if err := initBridgeDriver(controller, config); err != nil {
   667  			return nil, err
   668  		}
   669  	} else {
   670  		removeDefaultBridgeInterface()
   671  	}
   672  
   673  	return controller, nil
   674  }
   675  
   676  func driverOptions(config *Config) []nwconfig.Option {
   677  	bridgeConfig := options.Generic{
   678  		"EnableIPForwarding":  config.bridgeConfig.EnableIPForward,
   679  		"EnableIPTables":      config.bridgeConfig.EnableIPTables,
   680  		"EnableUserlandProxy": config.bridgeConfig.EnableUserlandProxy,
   681  		"UserlandProxyPath":   config.bridgeConfig.UserlandProxyPath}
   682  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   683  
   684  	dOptions := []nwconfig.Option{}
   685  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   686  	return dOptions
   687  }
   688  
   689  func initBridgeDriver(controller libnetwork.NetworkController, config *Config) error {
   690  	bridgeName := bridge.DefaultBridgeName
   691  	if config.bridgeConfig.Iface != "" {
   692  		bridgeName = config.bridgeConfig.Iface
   693  	}
   694  	netOption := map[string]string{
   695  		bridge.BridgeName:         bridgeName,
   696  		bridge.DefaultBridge:      strconv.FormatBool(true),
   697  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   698  		bridge.EnableIPMasquerade: strconv.FormatBool(config.bridgeConfig.EnableIPMasq),
   699  		bridge.EnableICC:          strconv.FormatBool(config.bridgeConfig.InterContainerCommunication),
   700  	}
   701  
   702  	// --ip processing
   703  	if config.bridgeConfig.DefaultIP != nil {
   704  		netOption[bridge.DefaultBindingIP] = config.bridgeConfig.DefaultIP.String()
   705  	}
   706  
   707  	var (
   708  		ipamV4Conf *libnetwork.IpamConf
   709  		ipamV6Conf *libnetwork.IpamConf
   710  	)
   711  
   712  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   713  
   714  	nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   715  	if err != nil {
   716  		return errors.Wrap(err, "list bridge addresses failed")
   717  	}
   718  
   719  	nw := nwList[0]
   720  	if len(nwList) > 1 && config.bridgeConfig.FixedCIDR != "" {
   721  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   722  		if err != nil {
   723  			return errors.Wrap(err, "parse CIDR failed")
   724  		}
   725  		// Iterate through in case there are multiple addresses for the bridge
   726  		for _, entry := range nwList {
   727  			if fCIDR.Contains(entry.IP) {
   728  				nw = entry
   729  				break
   730  			}
   731  		}
   732  	}
   733  
   734  	ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   735  	hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   736  	if hip.IsGlobalUnicast() {
   737  		ipamV4Conf.Gateway = nw.IP.String()
   738  	}
   739  
   740  	if config.bridgeConfig.IP != "" {
   741  		ipamV4Conf.PreferredPool = config.bridgeConfig.IP
   742  		ip, _, err := net.ParseCIDR(config.bridgeConfig.IP)
   743  		if err != nil {
   744  			return err
   745  		}
   746  		ipamV4Conf.Gateway = ip.String()
   747  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   748  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   749  	}
   750  
   751  	if config.bridgeConfig.FixedCIDR != "" {
   752  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   753  		if err != nil {
   754  			return err
   755  		}
   756  
   757  		ipamV4Conf.SubPool = fCIDR.String()
   758  	}
   759  
   760  	if config.bridgeConfig.DefaultGatewayIPv4 != nil {
   761  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.bridgeConfig.DefaultGatewayIPv4.String()
   762  	}
   763  
   764  	var deferIPv6Alloc bool
   765  	if config.bridgeConfig.FixedCIDRv6 != "" {
   766  		_, fCIDRv6, err := net.ParseCIDR(config.bridgeConfig.FixedCIDRv6)
   767  		if err != nil {
   768  			return err
   769  		}
   770  
   771  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
   772  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
   773  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
   774  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
   775  		// on this network until after the driver has created the endpoint and returned the
   776  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
   777  		ones, _ := fCIDRv6.Mask.Size()
   778  		deferIPv6Alloc = ones <= 80
   779  
   780  		if ipamV6Conf == nil {
   781  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   782  		}
   783  		ipamV6Conf.PreferredPool = fCIDRv6.String()
   784  
   785  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
   786  		// address belongs to the same network, we need to inform libnetwork about it, so
   787  		// that it can be reserved with IPAM and it will not be given away to somebody else
   788  		for _, nw6 := range nw6List {
   789  			if fCIDRv6.Contains(nw6.IP) {
   790  				ipamV6Conf.Gateway = nw6.IP.String()
   791  				break
   792  			}
   793  		}
   794  	}
   795  
   796  	if config.bridgeConfig.DefaultGatewayIPv6 != nil {
   797  		if ipamV6Conf == nil {
   798  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   799  		}
   800  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.bridgeConfig.DefaultGatewayIPv6.String()
   801  	}
   802  
   803  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
   804  	v6Conf := []*libnetwork.IpamConf{}
   805  	if ipamV6Conf != nil {
   806  		v6Conf = append(v6Conf, ipamV6Conf)
   807  	}
   808  	// Initialize default network on "bridge" with the same name
   809  	_, err = controller.NewNetwork("bridge", "bridge", "",
   810  		libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6),
   811  		libnetwork.NetworkOptionDriverOpts(netOption),
   812  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
   813  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
   814  	if err != nil {
   815  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
   816  	}
   817  	return nil
   818  }
   819  
   820  // Remove default bridge interface if present (--bridge=none use case)
   821  func removeDefaultBridgeInterface() {
   822  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
   823  		if err := netlink.LinkDel(lnk); err != nil {
   824  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
   825  		}
   826  	}
   827  }
   828  
   829  func (daemon *Daemon) getLayerInit() func(string) error {
   830  	return daemon.setupInitLayer
   831  }
   832  
   833  // setupInitLayer populates a directory with mountpoints suitable
   834  // for bind-mounting things into the container.
   835  //
   836  // This extra layer is used by all containers as the top-most ro layer. It protects
   837  // the container from unwanted side-effects on the rw layer.
   838  func setupInitLayer(initLayer string, rootUID, rootGID int) error {
   839  	for pth, typ := range map[string]string{
   840  		"/dev/pts":         "dir",
   841  		"/dev/shm":         "dir",
   842  		"/proc":            "dir",
   843  		"/sys":             "dir",
   844  		"/.dockerenv":      "file",
   845  		"/etc/resolv.conf": "file",
   846  		"/etc/hosts":       "file",
   847  		"/etc/hostname":    "file",
   848  		"/dev/console":     "file",
   849  		"/etc/mtab":        "/proc/mounts",
   850  	} {
   851  		parts := strings.Split(pth, "/")
   852  		prev := "/"
   853  		for _, p := range parts[1:] {
   854  			prev = filepath.Join(prev, p)
   855  			syscall.Unlink(filepath.Join(initLayer, prev))
   856  		}
   857  
   858  		if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
   859  			if os.IsNotExist(err) {
   860  				if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil {
   861  					return err
   862  				}
   863  				switch typ {
   864  				case "dir":
   865  					if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil {
   866  						return err
   867  					}
   868  				case "file":
   869  					f, err := os.OpenFile(filepath.Join(initLayer, pth), os.O_CREATE, 0755)
   870  					if err != nil {
   871  						return err
   872  					}
   873  					f.Chown(rootUID, rootGID)
   874  					f.Close()
   875  				default:
   876  					if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
   877  						return err
   878  					}
   879  				}
   880  			} else {
   881  				return err
   882  			}
   883  		}
   884  	}
   885  
   886  	// Layer is ready to use, if it wasn't before.
   887  	return nil
   888  }
   889  
   890  // Parse the remapped root (user namespace) option, which can be one of:
   891  //   username            - valid username from /etc/passwd
   892  //   username:groupname  - valid username; valid groupname from /etc/group
   893  //   uid                 - 32-bit unsigned int valid Linux UID value
   894  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
   895  //
   896  //  If no groupname is specified, and a username is specified, an attempt
   897  //  will be made to lookup a gid for that username as a groupname
   898  //
   899  //  If names are used, they are verified to exist in passwd/group
   900  func parseRemappedRoot(usergrp string) (string, string, error) {
   901  
   902  	var (
   903  		userID, groupID     int
   904  		username, groupname string
   905  	)
   906  
   907  	idparts := strings.Split(usergrp, ":")
   908  	if len(idparts) > 2 {
   909  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
   910  	}
   911  
   912  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
   913  		// must be a uid; take it as valid
   914  		userID = int(uid)
   915  		luser, err := user.LookupUid(userID)
   916  		if err != nil {
   917  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
   918  		}
   919  		username = luser.Name
   920  		if len(idparts) == 1 {
   921  			// if the uid was numeric and no gid was specified, take the uid as the gid
   922  			groupID = userID
   923  			lgrp, err := user.LookupGid(groupID)
   924  			if err != nil {
   925  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
   926  			}
   927  			groupname = lgrp.Name
   928  		}
   929  	} else {
   930  		lookupName := idparts[0]
   931  		// special case: if the user specified "default", they want Docker to create or
   932  		// use (after creation) the "dockremap" user/group for root remapping
   933  		if lookupName == defaultIDSpecifier {
   934  			lookupName = defaultRemappedID
   935  		}
   936  		luser, err := user.LookupUser(lookupName)
   937  		if err != nil && idparts[0] != defaultIDSpecifier {
   938  			// error if the name requested isn't the special "dockremap" ID
   939  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
   940  		} else if err != nil {
   941  			// special case-- if the username == "default", then we have been asked
   942  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
   943  			// ranges will be used for the user and group mappings in user namespaced containers
   944  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
   945  			if err == nil {
   946  				return defaultRemappedID, defaultRemappedID, nil
   947  			}
   948  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
   949  		}
   950  		username = luser.Name
   951  		if len(idparts) == 1 {
   952  			// we only have a string username, and no group specified; look up gid from username as group
   953  			group, err := user.LookupGroup(lookupName)
   954  			if err != nil {
   955  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
   956  			}
   957  			groupID = group.Gid
   958  			groupname = group.Name
   959  		}
   960  	}
   961  
   962  	if len(idparts) == 2 {
   963  		// groupname or gid is separately specified and must be resolved
   964  		// to an unsigned 32-bit gid
   965  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
   966  			// must be a gid, take it as valid
   967  			groupID = int(gid)
   968  			lgrp, err := user.LookupGid(groupID)
   969  			if err != nil {
   970  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
   971  			}
   972  			groupname = lgrp.Name
   973  		} else {
   974  			// not a number; attempt a lookup
   975  			if _, err := user.LookupGroup(idparts[1]); err != nil {
   976  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
   977  			}
   978  			groupname = idparts[1]
   979  		}
   980  	}
   981  	return username, groupname, nil
   982  }
   983  
   984  func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
   985  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
   986  		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
   987  	}
   988  
   989  	// if the daemon was started with remapped root option, parse
   990  	// the config option to the int uid,gid values
   991  	var (
   992  		uidMaps, gidMaps []idtools.IDMap
   993  	)
   994  	if config.RemappedRoot != "" {
   995  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
   996  		if err != nil {
   997  			return nil, nil, err
   998  		}
   999  		if username == "root" {
  1000  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  1001  			// effectively
  1002  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  1003  			return uidMaps, gidMaps, nil
  1004  		}
  1005  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  1006  		// update remapped root setting now that we have resolved them to actual names
  1007  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  1008  
  1009  		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
  1010  		if err != nil {
  1011  			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
  1012  		}
  1013  	}
  1014  	return uidMaps, gidMaps, nil
  1015  }
  1016  
  1017  func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
  1018  	config.Root = rootDir
  1019  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1020  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
  1021  	// (e.g. mounted layers of a container) can traverse this path.
  1022  	// The user namespace support will create subdirectories for the remapped root host uid:gid
  1023  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1024  	// layer content subtrees.
  1025  	if _, err := os.Stat(rootDir); err == nil {
  1026  		// root current exists; verify the access bits are correct by setting them
  1027  		if err = os.Chmod(rootDir, 0711); err != nil {
  1028  			return err
  1029  		}
  1030  	} else if os.IsNotExist(err) {
  1031  		// no root exists yet, create it 0711 with root:root ownership
  1032  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1033  			return err
  1034  		}
  1035  	}
  1036  
  1037  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1038  	// with any/all specified remapped root uid/gid options on the daemon creating
  1039  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1040  	// `chdir()` to work for containers namespaced to that uid/gid)
  1041  	if config.RemappedRoot != "" {
  1042  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
  1043  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1044  		// Create the root directory if it doesn't exist
  1045  		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
  1046  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1047  		}
  1048  		// we also need to verify that any pre-existing directories in the path to
  1049  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1050  		// has strict permissions that don't allow "x", container start will fail, so
  1051  		// better to warn and fail now
  1052  		dirPath := config.Root
  1053  		for {
  1054  			dirPath = filepath.Dir(dirPath)
  1055  			if dirPath == "/" {
  1056  				break
  1057  			}
  1058  			if !idtools.CanAccess(dirPath, rootUID, rootGID) {
  1059  				return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1060  			}
  1061  		}
  1062  	}
  1063  	return nil
  1064  }
  1065  
  1066  // registerLinks writes the links to a file.
  1067  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1068  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1069  		return nil
  1070  	}
  1071  
  1072  	for _, l := range hostConfig.Links {
  1073  		name, alias, err := runconfigopts.ParseLink(l)
  1074  		if err != nil {
  1075  			return err
  1076  		}
  1077  		child, err := daemon.GetContainer(name)
  1078  		if err != nil {
  1079  			return fmt.Errorf("Could not get container for %s", name)
  1080  		}
  1081  		for child.HostConfig.NetworkMode.IsContainer() {
  1082  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1083  			child, err = daemon.GetContainer(parts[1])
  1084  			if err != nil {
  1085  				return fmt.Errorf("Could not get container for %s", parts[1])
  1086  			}
  1087  		}
  1088  		if child.HostConfig.NetworkMode.IsHost() {
  1089  			return runconfig.ErrConflictHostNetworkAndLinks
  1090  		}
  1091  		if err := daemon.registerLink(container, child, alias); err != nil {
  1092  			return err
  1093  		}
  1094  	}
  1095  
  1096  	// After we load all the links into the daemon
  1097  	// set them to nil on the hostconfig
  1098  	return container.WriteHostConfig()
  1099  }
  1100  
  1101  // conditionalMountOnStart is a platform specific helper function during the
  1102  // container start to call mount.
  1103  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1104  	return daemon.Mount(container)
  1105  }
  1106  
  1107  // conditionalUnmountOnCleanup is a platform specific helper function called
  1108  // during the cleanup of a container to unmount.
  1109  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1110  	return daemon.Unmount(container)
  1111  }
  1112  
  1113  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1114  	if !c.IsRunning() {
  1115  		return nil, errNotRunning{c.ID}
  1116  	}
  1117  	stats, err := daemon.containerd.Stats(c.ID)
  1118  	if err != nil {
  1119  		return nil, err
  1120  	}
  1121  	s := &types.StatsJSON{}
  1122  	cgs := stats.CgroupStats
  1123  	if cgs != nil {
  1124  		s.BlkioStats = types.BlkioStats{
  1125  			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1126  			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1127  			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1128  			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1129  			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1130  			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1131  			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1132  			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1133  		}
  1134  		cpu := cgs.CpuStats
  1135  		s.CPUStats = types.CPUStats{
  1136  			CPUUsage: types.CPUUsage{
  1137  				TotalUsage:        cpu.CpuUsage.TotalUsage,
  1138  				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
  1139  				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1140  				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
  1141  			},
  1142  			ThrottlingData: types.ThrottlingData{
  1143  				Periods:          cpu.ThrottlingData.Periods,
  1144  				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1145  				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
  1146  			},
  1147  		}
  1148  		mem := cgs.MemoryStats.Usage
  1149  		s.MemoryStats = types.MemoryStats{
  1150  			Usage:    mem.Usage,
  1151  			MaxUsage: mem.MaxUsage,
  1152  			Stats:    cgs.MemoryStats.Stats,
  1153  			Failcnt:  mem.Failcnt,
  1154  			Limit:    mem.Limit,
  1155  		}
  1156  		// if the container does not set memory limit, use the machineMemory
  1157  		if mem.Limit > daemon.statsCollector.machineMemory && daemon.statsCollector.machineMemory > 0 {
  1158  			s.MemoryStats.Limit = daemon.statsCollector.machineMemory
  1159  		}
  1160  		if cgs.PidsStats != nil {
  1161  			s.PidsStats = types.PidsStats{
  1162  				Current: cgs.PidsStats.Current,
  1163  			}
  1164  		}
  1165  	}
  1166  	s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1167  	if err != nil {
  1168  		return nil, err
  1169  	}
  1170  	return s, nil
  1171  }
  1172  
  1173  // setDefaultIsolation determines the default isolation mode for the
  1174  // daemon to run in. This is only applicable on Windows
  1175  func (daemon *Daemon) setDefaultIsolation() error {
  1176  	return nil
  1177  }
  1178  
  1179  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1180  	var layers []string
  1181  	for _, l := range rootfs.DiffIDs {
  1182  		layers = append(layers, l.String())
  1183  	}
  1184  	return types.RootFS{
  1185  		Type:   rootfs.Type,
  1186  		Layers: layers,
  1187  	}
  1188  }
  1189  
  1190  // setupDaemonProcess sets various settings for the daemon's process
  1191  func setupDaemonProcess(config *Config) error {
  1192  	// setup the daemons oom_score_adj
  1193  	return setupOOMScoreAdj(config.OOMScoreAdjust)
  1194  }
  1195  
  1196  func setupOOMScoreAdj(score int) error {
  1197  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1198  	if err != nil {
  1199  		return err
  1200  	}
  1201  
  1202  	stringScore := strconv.Itoa(score)
  1203  	_, err = f.WriteString(stringScore)
  1204  	if os.IsPermission(err) {
  1205  		// Setting oom_score_adj does not work in an
  1206  		// unprivileged container. Ignore the error, but log
  1207  		// it if we appear not to be in that situation.
  1208  		if !rsystem.RunningInUserNS() {
  1209  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1210  		}
  1211  		return nil
  1212  	}
  1213  	f.Close()
  1214  	return err
  1215  }
  1216  
  1217  func (daemon *Daemon) initCgroupsPath(path string) error {
  1218  	if path == "/" || path == "." {
  1219  		return nil
  1220  	}
  1221  
  1222  	daemon.initCgroupsPath(filepath.Dir(path))
  1223  
  1224  	_, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1225  	if err != nil {
  1226  		return err
  1227  	}
  1228  
  1229  	path = filepath.Join(root, path)
  1230  	sysinfo := sysinfo.New(false)
  1231  	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1232  		return err
  1233  	}
  1234  	if sysinfo.CPURealtimePeriod && daemon.configStore.CPURealtimePeriod != 0 {
  1235  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_period_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimePeriod, 10)), 0700); err != nil {
  1236  			return err
  1237  		}
  1238  	}
  1239  	if sysinfo.CPURealtimeRuntime && daemon.configStore.CPURealtimeRuntime != 0 {
  1240  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_runtime_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimeRuntime, 10)), 0700); err != nil {
  1241  			return err
  1242  		}
  1243  	}
  1244  
  1245  	return nil
  1246  }