github.com/endophage/docker@v1.4.2-0.20161027011718-242853499895/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"runtime/debug"
    14  	"strconv"
    15  	"strings"
    16  	"syscall"
    17  
    18  	"github.com/Sirupsen/logrus"
    19  	"github.com/docker/docker/api/types"
    20  	"github.com/docker/docker/api/types/blkiodev"
    21  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    22  	containertypes "github.com/docker/docker/api/types/container"
    23  	"github.com/docker/docker/container"
    24  	"github.com/docker/docker/image"
    25  	"github.com/docker/docker/pkg/idtools"
    26  	"github.com/docker/docker/pkg/parsers"
    27  	"github.com/docker/docker/pkg/parsers/kernel"
    28  	"github.com/docker/docker/pkg/sysinfo"
    29  	"github.com/docker/docker/runconfig"
    30  	runconfigopts "github.com/docker/docker/runconfig/opts"
    31  	"github.com/docker/libnetwork"
    32  	nwconfig "github.com/docker/libnetwork/config"
    33  	"github.com/docker/libnetwork/drivers/bridge"
    34  	"github.com/docker/libnetwork/netlabel"
    35  	"github.com/docker/libnetwork/netutils"
    36  	"github.com/docker/libnetwork/options"
    37  	lntypes "github.com/docker/libnetwork/types"
    38  	"github.com/golang/protobuf/ptypes"
    39  	"github.com/opencontainers/runc/libcontainer/cgroups"
    40  	"github.com/opencontainers/runc/libcontainer/label"
    41  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    42  	"github.com/opencontainers/runc/libcontainer/user"
    43  	specs "github.com/opencontainers/runtime-spec/specs-go"
    44  	"github.com/vishvananda/netlink"
    45  )
    46  
    47  const (
    48  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    49  	linuxMinCPUShares = 2
    50  	linuxMaxCPUShares = 262144
    51  	platformSupported = true
    52  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    53  	linuxMinMemory = 4194304
    54  	// constants for remapped root settings
    55  	defaultIDSpecifier string = "default"
    56  	defaultRemappedID  string = "dockremap"
    57  
    58  	// constant for cgroup drivers
    59  	cgroupFsDriver      = "cgroupfs"
    60  	cgroupSystemdDriver = "systemd"
    61  )
    62  
    63  func getMemoryResources(config containertypes.Resources) *specs.Memory {
    64  	memory := specs.Memory{}
    65  
    66  	if config.Memory > 0 {
    67  		limit := uint64(config.Memory)
    68  		memory.Limit = &limit
    69  	}
    70  
    71  	if config.MemoryReservation > 0 {
    72  		reservation := uint64(config.MemoryReservation)
    73  		memory.Reservation = &reservation
    74  	}
    75  
    76  	if config.MemorySwap != 0 {
    77  		swap := uint64(config.MemorySwap)
    78  		memory.Swap = &swap
    79  	}
    80  
    81  	if config.MemorySwappiness != nil {
    82  		swappiness := uint64(*config.MemorySwappiness)
    83  		memory.Swappiness = &swappiness
    84  	}
    85  
    86  	if config.KernelMemory != 0 {
    87  		kernelMemory := uint64(config.KernelMemory)
    88  		memory.Kernel = &kernelMemory
    89  	}
    90  
    91  	return &memory
    92  }
    93  
    94  func getCPUResources(config containertypes.Resources) *specs.CPU {
    95  	cpu := specs.CPU{}
    96  
    97  	if config.CPUShares != 0 {
    98  		shares := uint64(config.CPUShares)
    99  		cpu.Shares = &shares
   100  	}
   101  
   102  	if config.CpusetCpus != "" {
   103  		cpuset := config.CpusetCpus
   104  		cpu.Cpus = &cpuset
   105  	}
   106  
   107  	if config.CpusetMems != "" {
   108  		cpuset := config.CpusetMems
   109  		cpu.Mems = &cpuset
   110  	}
   111  
   112  	if config.CPUPeriod != 0 {
   113  		period := uint64(config.CPUPeriod)
   114  		cpu.Period = &period
   115  	}
   116  
   117  	if config.CPUQuota != 0 {
   118  		quota := uint64(config.CPUQuota)
   119  		cpu.Quota = &quota
   120  	}
   121  
   122  	if config.CPURealtimePeriod != 0 {
   123  		period := uint64(config.CPURealtimePeriod)
   124  		cpu.RealtimePeriod = &period
   125  	}
   126  
   127  	if config.CPURealtimeRuntime != 0 {
   128  		runtime := uint64(config.CPURealtimeRuntime)
   129  		cpu.RealtimeRuntime = &runtime
   130  	}
   131  
   132  	return &cpu
   133  }
   134  
   135  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
   136  	var stat syscall.Stat_t
   137  	var blkioWeightDevices []specs.WeightDevice
   138  
   139  	for _, weightDevice := range config.BlkioWeightDevice {
   140  		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
   141  			return nil, err
   142  		}
   143  		weight := weightDevice.Weight
   144  		d := specs.WeightDevice{Weight: &weight}
   145  		d.Major = int64(stat.Rdev / 256)
   146  		d.Minor = int64(stat.Rdev % 256)
   147  		blkioWeightDevices = append(blkioWeightDevices, d)
   148  	}
   149  
   150  	return blkioWeightDevices, nil
   151  }
   152  
   153  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   154  	var (
   155  		labelOpts []string
   156  		err       error
   157  	)
   158  
   159  	for _, opt := range config.SecurityOpt {
   160  		if opt == "no-new-privileges" {
   161  			container.NoNewPrivileges = true
   162  		} else {
   163  			var con []string
   164  			if strings.Contains(opt, "=") {
   165  				con = strings.SplitN(opt, "=", 2)
   166  			} else if strings.Contains(opt, ":") {
   167  				con = strings.SplitN(opt, ":", 2)
   168  				logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 1.13, use `=` instead.")
   169  			}
   170  
   171  			if len(con) != 2 {
   172  				return fmt.Errorf("Invalid --security-opt 1: %q", opt)
   173  			}
   174  
   175  			switch con[0] {
   176  			case "label":
   177  				labelOpts = append(labelOpts, con[1])
   178  			case "apparmor":
   179  				container.AppArmorProfile = con[1]
   180  			case "seccomp":
   181  				container.SeccompProfile = con[1]
   182  			default:
   183  				return fmt.Errorf("Invalid --security-opt 2: %q", opt)
   184  			}
   185  		}
   186  	}
   187  
   188  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   189  	return err
   190  }
   191  
   192  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.ThrottleDevice, error) {
   193  	var throttleDevices []specs.ThrottleDevice
   194  	var stat syscall.Stat_t
   195  
   196  	for _, d := range devs {
   197  		if err := syscall.Stat(d.Path, &stat); err != nil {
   198  			return nil, err
   199  		}
   200  		rate := d.Rate
   201  		d := specs.ThrottleDevice{Rate: &rate}
   202  		d.Major = int64(stat.Rdev / 256)
   203  		d.Minor = int64(stat.Rdev % 256)
   204  		throttleDevices = append(throttleDevices, d)
   205  	}
   206  
   207  	return throttleDevices, nil
   208  }
   209  
   210  func checkKernel() error {
   211  	// Check for unsupported kernel versions
   212  	// FIXME: it would be cleaner to not test for specific versions, but rather
   213  	// test for specific functionalities.
   214  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   215  	// without actually causing a kernel panic, so we need this workaround until
   216  	// the circumstances of pre-3.10 crashes are clearer.
   217  	// For details see https://github.com/docker/docker/issues/407
   218  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   219  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   220  	if !kernel.CheckKernelVersion(3, 10, 0) {
   221  		v, _ := kernel.GetKernelVersion()
   222  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   223  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   224  		}
   225  	}
   226  	return nil
   227  }
   228  
   229  // adaptContainerSettings is called during container creation to modify any
   230  // settings necessary in the HostConfig structure.
   231  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   232  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   233  		// Handle unsupported CPUShares
   234  		if hostConfig.CPUShares < linuxMinCPUShares {
   235  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   236  			hostConfig.CPUShares = linuxMinCPUShares
   237  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   238  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   239  			hostConfig.CPUShares = linuxMaxCPUShares
   240  		}
   241  	}
   242  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   243  		// By default, MemorySwap is set to twice the size of Memory.
   244  		hostConfig.MemorySwap = hostConfig.Memory * 2
   245  	}
   246  	if hostConfig.ShmSize == 0 {
   247  		hostConfig.ShmSize = container.DefaultSHMSize
   248  	}
   249  	var err error
   250  	if hostConfig.SecurityOpt == nil {
   251  		hostConfig.SecurityOpt, err = daemon.generateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode, hostConfig.Privileged)
   252  		if err != nil {
   253  			return err
   254  		}
   255  	}
   256  	if hostConfig.MemorySwappiness == nil {
   257  		defaultSwappiness := int64(-1)
   258  		hostConfig.MemorySwappiness = &defaultSwappiness
   259  	}
   260  	if hostConfig.OomKillDisable == nil {
   261  		defaultOomKillDisable := false
   262  		hostConfig.OomKillDisable = &defaultOomKillDisable
   263  	}
   264  
   265  	return nil
   266  }
   267  
   268  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   269  	warnings := []string{}
   270  
   271  	// memory subsystem checks and adjustments
   272  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   273  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   274  	}
   275  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   276  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   277  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   278  		resources.Memory = 0
   279  		resources.MemorySwap = -1
   280  	}
   281  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   282  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   283  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   284  		resources.MemorySwap = -1
   285  	}
   286  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   287  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   288  	}
   289  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   290  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   291  	}
   292  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness {
   293  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   294  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   295  		resources.MemorySwappiness = nil
   296  	}
   297  	if resources.MemorySwappiness != nil {
   298  		swappiness := *resources.MemorySwappiness
   299  		if swappiness < -1 || swappiness > 100 {
   300  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   301  		}
   302  	}
   303  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   304  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   305  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   306  		resources.MemoryReservation = 0
   307  	}
   308  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   309  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   310  	}
   311  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   312  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   313  	}
   314  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   315  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   316  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   317  		resources.KernelMemory = 0
   318  	}
   319  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   320  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   321  	}
   322  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   323  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   324  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   325  	}
   326  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   327  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   328  		// warning the caller if they already wanted the feature to be off
   329  		if *resources.OomKillDisable {
   330  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   331  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   332  		}
   333  		resources.OomKillDisable = nil
   334  	}
   335  
   336  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   337  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   338  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   339  		resources.PidsLimit = 0
   340  	}
   341  
   342  	// cpu subsystem checks and adjustments
   343  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   344  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   345  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   346  		resources.CPUShares = 0
   347  	}
   348  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   349  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   350  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   351  		resources.CPUPeriod = 0
   352  	}
   353  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   354  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   355  	}
   356  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   357  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   358  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   359  		resources.CPUQuota = 0
   360  	}
   361  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   362  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   363  	}
   364  	if resources.CPUPercent > 0 {
   365  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   366  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   367  		resources.CPUPercent = 0
   368  	}
   369  
   370  	// cpuset subsystem checks and adjustments
   371  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   372  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   373  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   374  		resources.CpusetCpus = ""
   375  		resources.CpusetMems = ""
   376  	}
   377  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   378  	if err != nil {
   379  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   380  	}
   381  	if !cpusAvailable {
   382  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   383  	}
   384  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   385  	if err != nil {
   386  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   387  	}
   388  	if !memsAvailable {
   389  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   390  	}
   391  
   392  	// blkio subsystem checks and adjustments
   393  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   394  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   395  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   396  		resources.BlkioWeight = 0
   397  	}
   398  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   399  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   400  	}
   401  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   402  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   403  	}
   404  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   405  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   406  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   407  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   408  	}
   409  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   410  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   411  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   412  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   413  	}
   414  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   415  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   416  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   417  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   418  	}
   419  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   420  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   421  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   422  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   423  	}
   424  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   425  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   426  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   427  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   428  	}
   429  
   430  	return warnings, nil
   431  }
   432  
   433  func (daemon *Daemon) getCgroupDriver() string {
   434  	cgroupDriver := cgroupFsDriver
   435  
   436  	if UsingSystemd(daemon.configStore) {
   437  		cgroupDriver = cgroupSystemdDriver
   438  	}
   439  	return cgroupDriver
   440  }
   441  
   442  // getCD gets the raw value of the native.cgroupdriver option, if set.
   443  func getCD(config *Config) string {
   444  	for _, option := range config.ExecOptions {
   445  		key, val, err := parsers.ParseKeyValueOpt(option)
   446  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   447  			continue
   448  		}
   449  		return val
   450  	}
   451  	return ""
   452  }
   453  
   454  // VerifyCgroupDriver validates native.cgroupdriver
   455  func VerifyCgroupDriver(config *Config) error {
   456  	cd := getCD(config)
   457  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   458  		return nil
   459  	}
   460  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   461  }
   462  
   463  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   464  func UsingSystemd(config *Config) bool {
   465  	return getCD(config) == cgroupSystemdDriver
   466  }
   467  
   468  // verifyPlatformContainerSettings performs platform-specific validation of the
   469  // hostconfig and config structures.
   470  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   471  	warnings := []string{}
   472  	sysInfo := sysinfo.New(true)
   473  
   474  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   475  	if err != nil {
   476  		return warnings, err
   477  	}
   478  
   479  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   480  
   481  	// no matter err is nil or not, w could have data in itself.
   482  	warnings = append(warnings, w...)
   483  
   484  	if err != nil {
   485  		return warnings, err
   486  	}
   487  
   488  	if hostConfig.ShmSize < 0 {
   489  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   490  	}
   491  
   492  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   493  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   494  	}
   495  
   496  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   497  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   498  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   499  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   500  	}
   501  	// check for various conflicting options with user namespaces
   502  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   503  		if hostConfig.Privileged {
   504  			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
   505  		}
   506  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   507  			return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
   508  		}
   509  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   510  			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
   511  		}
   512  	}
   513  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   514  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   515  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   516  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   517  		}
   518  	}
   519  	if hostConfig.Runtime == "" {
   520  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   521  	}
   522  
   523  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   524  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   525  	}
   526  
   527  	return warnings, nil
   528  }
   529  
   530  // platformReload update configuration with platform specific options
   531  func (daemon *Daemon) platformReload(config *Config) map[string]string {
   532  	if config.IsValueSet("runtimes") {
   533  		daemon.configStore.Runtimes = config.Runtimes
   534  		// Always set the default one
   535  		daemon.configStore.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   536  	}
   537  
   538  	if config.DefaultRuntime != "" {
   539  		daemon.configStore.DefaultRuntime = config.DefaultRuntime
   540  	}
   541  
   542  	// Update attributes
   543  	var runtimeList bytes.Buffer
   544  	for name, rt := range daemon.configStore.Runtimes {
   545  		if runtimeList.Len() > 0 {
   546  			runtimeList.WriteRune(' ')
   547  		}
   548  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   549  	}
   550  
   551  	return map[string]string{
   552  		"runtimes":        runtimeList.String(),
   553  		"default-runtime": daemon.configStore.DefaultRuntime,
   554  	}
   555  }
   556  
   557  // verifyDaemonSettings performs validation of daemon config struct
   558  func verifyDaemonSettings(config *Config) error {
   559  	// Check for mutually incompatible config options
   560  	if config.bridgeConfig.Iface != "" && config.bridgeConfig.IP != "" {
   561  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   562  	}
   563  	if !config.bridgeConfig.EnableIPTables && !config.bridgeConfig.InterContainerCommunication {
   564  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   565  	}
   566  	if !config.bridgeConfig.EnableIPTables && config.bridgeConfig.EnableIPMasq {
   567  		config.bridgeConfig.EnableIPMasq = false
   568  	}
   569  	if err := VerifyCgroupDriver(config); err != nil {
   570  		return err
   571  	}
   572  	if config.CgroupParent != "" && UsingSystemd(config) {
   573  		if len(config.CgroupParent) <= 6 || !strings.HasSuffix(config.CgroupParent, ".slice") {
   574  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   575  		}
   576  	}
   577  
   578  	if config.DefaultRuntime == "" {
   579  		config.DefaultRuntime = stockRuntimeName
   580  	}
   581  	if config.Runtimes == nil {
   582  		config.Runtimes = make(map[string]types.Runtime)
   583  	}
   584  	config.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   585  
   586  	return nil
   587  }
   588  
   589  // checkSystem validates platform-specific requirements
   590  func checkSystem() error {
   591  	if os.Geteuid() != 0 {
   592  		return fmt.Errorf("The Docker daemon needs to be run as root")
   593  	}
   594  	return checkKernel()
   595  }
   596  
   597  // configureMaxThreads sets the Go runtime max threads threshold
   598  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   599  func configureMaxThreads(config *Config) error {
   600  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   601  	if err != nil {
   602  		return err
   603  	}
   604  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   605  	if err != nil {
   606  		return err
   607  	}
   608  	maxThreads := (mtint / 100) * 90
   609  	debug.SetMaxThreads(maxThreads)
   610  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   611  	return nil
   612  }
   613  
   614  // configureKernelSecuritySupport configures and validates security support for the kernel
   615  func configureKernelSecuritySupport(config *Config, driverName string) error {
   616  	if config.EnableSelinuxSupport {
   617  		if !selinuxEnabled() {
   618  			logrus.Warn("Docker could not enable SELinux on the host system")
   619  		}
   620  	} else {
   621  		selinuxSetDisabled()
   622  	}
   623  	return nil
   624  }
   625  
   626  func (daemon *Daemon) initNetworkController(config *Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   627  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   628  	if err != nil {
   629  		return nil, err
   630  	}
   631  
   632  	controller, err := libnetwork.New(netOptions...)
   633  	if err != nil {
   634  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   635  	}
   636  
   637  	if len(activeSandboxes) > 0 {
   638  		logrus.Infof("There are old running containers, the network config will not take affect")
   639  		return controller, nil
   640  	}
   641  
   642  	// Initialize default network on "null"
   643  	if n, _ := controller.NetworkByName("none"); n == nil {
   644  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   645  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   646  		}
   647  	}
   648  
   649  	// Initialize default network on "host"
   650  	if n, _ := controller.NetworkByName("host"); n == nil {
   651  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   652  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   653  		}
   654  	}
   655  
   656  	// Clear stale bridge network
   657  	if n, err := controller.NetworkByName("bridge"); err == nil {
   658  		if err = n.Delete(); err != nil {
   659  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   660  		}
   661  	}
   662  
   663  	if !config.DisableBridge {
   664  		// Initialize default driver "bridge"
   665  		if err := initBridgeDriver(controller, config); err != nil {
   666  			return nil, err
   667  		}
   668  	} else {
   669  		removeDefaultBridgeInterface()
   670  	}
   671  
   672  	return controller, nil
   673  }
   674  
   675  func driverOptions(config *Config) []nwconfig.Option {
   676  	bridgeConfig := options.Generic{
   677  		"EnableIPForwarding":  config.bridgeConfig.EnableIPForward,
   678  		"EnableIPTables":      config.bridgeConfig.EnableIPTables,
   679  		"EnableUserlandProxy": config.bridgeConfig.EnableUserlandProxy,
   680  		"UserlandProxyPath":   config.bridgeConfig.UserlandProxyPath}
   681  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   682  
   683  	dOptions := []nwconfig.Option{}
   684  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   685  	return dOptions
   686  }
   687  
   688  func initBridgeDriver(controller libnetwork.NetworkController, config *Config) error {
   689  	bridgeName := bridge.DefaultBridgeName
   690  	if config.bridgeConfig.Iface != "" {
   691  		bridgeName = config.bridgeConfig.Iface
   692  	}
   693  	netOption := map[string]string{
   694  		bridge.BridgeName:         bridgeName,
   695  		bridge.DefaultBridge:      strconv.FormatBool(true),
   696  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   697  		bridge.EnableIPMasquerade: strconv.FormatBool(config.bridgeConfig.EnableIPMasq),
   698  		bridge.EnableICC:          strconv.FormatBool(config.bridgeConfig.InterContainerCommunication),
   699  	}
   700  
   701  	// --ip processing
   702  	if config.bridgeConfig.DefaultIP != nil {
   703  		netOption[bridge.DefaultBindingIP] = config.bridgeConfig.DefaultIP.String()
   704  	}
   705  
   706  	var (
   707  		ipamV4Conf *libnetwork.IpamConf
   708  		ipamV6Conf *libnetwork.IpamConf
   709  	)
   710  
   711  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   712  
   713  	nw, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   714  	if err == nil {
   715  		ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   716  		hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   717  		if hip.IsGlobalUnicast() {
   718  			ipamV4Conf.Gateway = nw.IP.String()
   719  		}
   720  	}
   721  
   722  	if config.bridgeConfig.IP != "" {
   723  		ipamV4Conf.PreferredPool = config.bridgeConfig.IP
   724  		ip, _, err := net.ParseCIDR(config.bridgeConfig.IP)
   725  		if err != nil {
   726  			return err
   727  		}
   728  		ipamV4Conf.Gateway = ip.String()
   729  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   730  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   731  	}
   732  
   733  	if config.bridgeConfig.FixedCIDR != "" {
   734  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   735  		if err != nil {
   736  			return err
   737  		}
   738  
   739  		ipamV4Conf.SubPool = fCIDR.String()
   740  	}
   741  
   742  	if config.bridgeConfig.DefaultGatewayIPv4 != nil {
   743  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.bridgeConfig.DefaultGatewayIPv4.String()
   744  	}
   745  
   746  	var deferIPv6Alloc bool
   747  	if config.bridgeConfig.FixedCIDRv6 != "" {
   748  		_, fCIDRv6, err := net.ParseCIDR(config.bridgeConfig.FixedCIDRv6)
   749  		if err != nil {
   750  			return err
   751  		}
   752  
   753  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
   754  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
   755  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
   756  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
   757  		// on this network until after the driver has created the endpoint and returned the
   758  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
   759  		ones, _ := fCIDRv6.Mask.Size()
   760  		deferIPv6Alloc = ones <= 80
   761  
   762  		if ipamV6Conf == nil {
   763  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   764  		}
   765  		ipamV6Conf.PreferredPool = fCIDRv6.String()
   766  
   767  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
   768  		// address belongs to the same network, we need to inform libnetwork about it, so
   769  		// that it can be reserved with IPAM and it will not be given away to somebody else
   770  		for _, nw6 := range nw6List {
   771  			if fCIDRv6.Contains(nw6.IP) {
   772  				ipamV6Conf.Gateway = nw6.IP.String()
   773  				break
   774  			}
   775  		}
   776  	}
   777  
   778  	if config.bridgeConfig.DefaultGatewayIPv6 != nil {
   779  		if ipamV6Conf == nil {
   780  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   781  		}
   782  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.bridgeConfig.DefaultGatewayIPv6.String()
   783  	}
   784  
   785  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
   786  	v6Conf := []*libnetwork.IpamConf{}
   787  	if ipamV6Conf != nil {
   788  		v6Conf = append(v6Conf, ipamV6Conf)
   789  	}
   790  	// Initialize default network on "bridge" with the same name
   791  	_, err = controller.NewNetwork("bridge", "bridge", "",
   792  		libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6),
   793  		libnetwork.NetworkOptionDriverOpts(netOption),
   794  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
   795  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
   796  	if err != nil {
   797  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
   798  	}
   799  	return nil
   800  }
   801  
   802  // Remove default bridge interface if present (--bridge=none use case)
   803  func removeDefaultBridgeInterface() {
   804  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
   805  		if err := netlink.LinkDel(lnk); err != nil {
   806  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
   807  		}
   808  	}
   809  }
   810  
   811  func (daemon *Daemon) getLayerInit() func(string) error {
   812  	return daemon.setupInitLayer
   813  }
   814  
   815  // setupInitLayer populates a directory with mountpoints suitable
   816  // for bind-mounting things into the container.
   817  //
   818  // This extra layer is used by all containers as the top-most ro layer. It protects
   819  // the container from unwanted side-effects on the rw layer.
   820  func setupInitLayer(initLayer string, rootUID, rootGID int) error {
   821  	for pth, typ := range map[string]string{
   822  		"/dev/pts":         "dir",
   823  		"/dev/shm":         "dir",
   824  		"/proc":            "dir",
   825  		"/sys":             "dir",
   826  		"/.dockerenv":      "file",
   827  		"/etc/resolv.conf": "file",
   828  		"/etc/hosts":       "file",
   829  		"/etc/hostname":    "file",
   830  		"/dev/console":     "file",
   831  		"/etc/mtab":        "/proc/mounts",
   832  	} {
   833  		parts := strings.Split(pth, "/")
   834  		prev := "/"
   835  		for _, p := range parts[1:] {
   836  			prev = filepath.Join(prev, p)
   837  			syscall.Unlink(filepath.Join(initLayer, prev))
   838  		}
   839  
   840  		if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
   841  			if os.IsNotExist(err) {
   842  				if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil {
   843  					return err
   844  				}
   845  				switch typ {
   846  				case "dir":
   847  					if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil {
   848  						return err
   849  					}
   850  				case "file":
   851  					f, err := os.OpenFile(filepath.Join(initLayer, pth), os.O_CREATE, 0755)
   852  					if err != nil {
   853  						return err
   854  					}
   855  					f.Chown(rootUID, rootGID)
   856  					f.Close()
   857  				default:
   858  					if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
   859  						return err
   860  					}
   861  				}
   862  			} else {
   863  				return err
   864  			}
   865  		}
   866  	}
   867  
   868  	// Layer is ready to use, if it wasn't before.
   869  	return nil
   870  }
   871  
   872  // Parse the remapped root (user namespace) option, which can be one of:
   873  //   username            - valid username from /etc/passwd
   874  //   username:groupname  - valid username; valid groupname from /etc/group
   875  //   uid                 - 32-bit unsigned int valid Linux UID value
   876  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
   877  //
   878  //  If no groupname is specified, and a username is specified, an attempt
   879  //  will be made to lookup a gid for that username as a groupname
   880  //
   881  //  If names are used, they are verified to exist in passwd/group
   882  func parseRemappedRoot(usergrp string) (string, string, error) {
   883  
   884  	var (
   885  		userID, groupID     int
   886  		username, groupname string
   887  	)
   888  
   889  	idparts := strings.Split(usergrp, ":")
   890  	if len(idparts) > 2 {
   891  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
   892  	}
   893  
   894  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
   895  		// must be a uid; take it as valid
   896  		userID = int(uid)
   897  		luser, err := user.LookupUid(userID)
   898  		if err != nil {
   899  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
   900  		}
   901  		username = luser.Name
   902  		if len(idparts) == 1 {
   903  			// if the uid was numeric and no gid was specified, take the uid as the gid
   904  			groupID = userID
   905  			lgrp, err := user.LookupGid(groupID)
   906  			if err != nil {
   907  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
   908  			}
   909  			groupname = lgrp.Name
   910  		}
   911  	} else {
   912  		lookupName := idparts[0]
   913  		// special case: if the user specified "default", they want Docker to create or
   914  		// use (after creation) the "dockremap" user/group for root remapping
   915  		if lookupName == defaultIDSpecifier {
   916  			lookupName = defaultRemappedID
   917  		}
   918  		luser, err := user.LookupUser(lookupName)
   919  		if err != nil && idparts[0] != defaultIDSpecifier {
   920  			// error if the name requested isn't the special "dockremap" ID
   921  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
   922  		} else if err != nil {
   923  			// special case-- if the username == "default", then we have been asked
   924  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
   925  			// ranges will be used for the user and group mappings in user namespaced containers
   926  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
   927  			if err == nil {
   928  				return defaultRemappedID, defaultRemappedID, nil
   929  			}
   930  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
   931  		}
   932  		username = luser.Name
   933  		if len(idparts) == 1 {
   934  			// we only have a string username, and no group specified; look up gid from username as group
   935  			group, err := user.LookupGroup(lookupName)
   936  			if err != nil {
   937  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
   938  			}
   939  			groupID = group.Gid
   940  			groupname = group.Name
   941  		}
   942  	}
   943  
   944  	if len(idparts) == 2 {
   945  		// groupname or gid is separately specified and must be resolved
   946  		// to an unsigned 32-bit gid
   947  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
   948  			// must be a gid, take it as valid
   949  			groupID = int(gid)
   950  			lgrp, err := user.LookupGid(groupID)
   951  			if err != nil {
   952  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
   953  			}
   954  			groupname = lgrp.Name
   955  		} else {
   956  			// not a number; attempt a lookup
   957  			if _, err := user.LookupGroup(idparts[1]); err != nil {
   958  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
   959  			}
   960  			groupname = idparts[1]
   961  		}
   962  	}
   963  	return username, groupname, nil
   964  }
   965  
   966  func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
   967  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
   968  		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
   969  	}
   970  
   971  	// if the daemon was started with remapped root option, parse
   972  	// the config option to the int uid,gid values
   973  	var (
   974  		uidMaps, gidMaps []idtools.IDMap
   975  	)
   976  	if config.RemappedRoot != "" {
   977  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
   978  		if err != nil {
   979  			return nil, nil, err
   980  		}
   981  		if username == "root" {
   982  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
   983  			// effectively
   984  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
   985  			return uidMaps, gidMaps, nil
   986  		}
   987  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
   988  		// update remapped root setting now that we have resolved them to actual names
   989  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
   990  
   991  		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
   992  		if err != nil {
   993  			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
   994  		}
   995  	}
   996  	return uidMaps, gidMaps, nil
   997  }
   998  
   999  func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
  1000  	config.Root = rootDir
  1001  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1002  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
  1003  	// (e.g. mounted layers of a container) can traverse this path.
  1004  	// The user namespace support will create subdirectories for the remapped root host uid:gid
  1005  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1006  	// layer content subtrees.
  1007  	if _, err := os.Stat(rootDir); err == nil {
  1008  		// root current exists; verify the access bits are correct by setting them
  1009  		if err = os.Chmod(rootDir, 0711); err != nil {
  1010  			return err
  1011  		}
  1012  	} else if os.IsNotExist(err) {
  1013  		// no root exists yet, create it 0711 with root:root ownership
  1014  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1015  			return err
  1016  		}
  1017  	}
  1018  
  1019  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1020  	// with any/all specified remapped root uid/gid options on the daemon creating
  1021  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1022  	// `chdir()` to work for containers namespaced to that uid/gid)
  1023  	if config.RemappedRoot != "" {
  1024  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
  1025  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1026  		// Create the root directory if it doesn't exist
  1027  		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
  1028  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1029  		}
  1030  		// we also need to verify that any pre-existing directories in the path to
  1031  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1032  		// has strict permissions that don't allow "x", container start will fail, so
  1033  		// better to warn and fail now
  1034  		dirPath := config.Root
  1035  		for {
  1036  			dirPath = filepath.Dir(dirPath)
  1037  			if dirPath == "/" {
  1038  				break
  1039  			}
  1040  			if !idtools.CanAccess(dirPath, rootUID, rootGID) {
  1041  				return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1042  			}
  1043  		}
  1044  	}
  1045  	return nil
  1046  }
  1047  
  1048  // registerLinks writes the links to a file.
  1049  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1050  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1051  		return nil
  1052  	}
  1053  
  1054  	for _, l := range hostConfig.Links {
  1055  		name, alias, err := runconfigopts.ParseLink(l)
  1056  		if err != nil {
  1057  			return err
  1058  		}
  1059  		child, err := daemon.GetContainer(name)
  1060  		if err != nil {
  1061  			return fmt.Errorf("Could not get container for %s", name)
  1062  		}
  1063  		for child.HostConfig.NetworkMode.IsContainer() {
  1064  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1065  			child, err = daemon.GetContainer(parts[1])
  1066  			if err != nil {
  1067  				return fmt.Errorf("Could not get container for %s", parts[1])
  1068  			}
  1069  		}
  1070  		if child.HostConfig.NetworkMode.IsHost() {
  1071  			return runconfig.ErrConflictHostNetworkAndLinks
  1072  		}
  1073  		if err := daemon.registerLink(container, child, alias); err != nil {
  1074  			return err
  1075  		}
  1076  	}
  1077  
  1078  	// After we load all the links into the daemon
  1079  	// set them to nil on the hostconfig
  1080  	return container.WriteHostConfig()
  1081  }
  1082  
  1083  // conditionalMountOnStart is a platform specific helper function during the
  1084  // container start to call mount.
  1085  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1086  	return daemon.Mount(container)
  1087  }
  1088  
  1089  // conditionalUnmountOnCleanup is a platform specific helper function called
  1090  // during the cleanup of a container to unmount.
  1091  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1092  	return daemon.Unmount(container)
  1093  }
  1094  
  1095  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1096  	if !c.IsRunning() {
  1097  		return nil, errNotRunning{c.ID}
  1098  	}
  1099  	stats, err := daemon.containerd.Stats(c.ID)
  1100  	if err != nil {
  1101  		return nil, err
  1102  	}
  1103  	s := &types.StatsJSON{}
  1104  	cgs := stats.CgroupStats
  1105  	if cgs != nil {
  1106  		s.BlkioStats = types.BlkioStats{
  1107  			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1108  			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1109  			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1110  			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1111  			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1112  			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1113  			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1114  			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1115  		}
  1116  		cpu := cgs.CpuStats
  1117  		s.CPUStats = types.CPUStats{
  1118  			CPUUsage: types.CPUUsage{
  1119  				TotalUsage:        cpu.CpuUsage.TotalUsage,
  1120  				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
  1121  				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1122  				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
  1123  			},
  1124  			ThrottlingData: types.ThrottlingData{
  1125  				Periods:          cpu.ThrottlingData.Periods,
  1126  				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1127  				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
  1128  			},
  1129  		}
  1130  		mem := cgs.MemoryStats.Usage
  1131  		s.MemoryStats = types.MemoryStats{
  1132  			Usage:    mem.Usage,
  1133  			MaxUsage: mem.MaxUsage,
  1134  			Stats:    cgs.MemoryStats.Stats,
  1135  			Failcnt:  mem.Failcnt,
  1136  			Limit:    mem.Limit,
  1137  		}
  1138  		// if the container does not set memory limit, use the machineMemory
  1139  		if mem.Limit > daemon.statsCollector.machineMemory && daemon.statsCollector.machineMemory > 0 {
  1140  			s.MemoryStats.Limit = daemon.statsCollector.machineMemory
  1141  		}
  1142  		if cgs.PidsStats != nil {
  1143  			s.PidsStats = types.PidsStats{
  1144  				Current: cgs.PidsStats.Current,
  1145  			}
  1146  		}
  1147  	}
  1148  	s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1149  	if err != nil {
  1150  		return nil, err
  1151  	}
  1152  	return s, nil
  1153  }
  1154  
  1155  // setDefaultIsolation determines the default isolation mode for the
  1156  // daemon to run in. This is only applicable on Windows
  1157  func (daemon *Daemon) setDefaultIsolation() error {
  1158  	return nil
  1159  }
  1160  
  1161  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1162  	var layers []string
  1163  	for _, l := range rootfs.DiffIDs {
  1164  		layers = append(layers, l.String())
  1165  	}
  1166  	return types.RootFS{
  1167  		Type:   rootfs.Type,
  1168  		Layers: layers,
  1169  	}
  1170  }
  1171  
  1172  // setupDaemonProcess sets various settings for the daemon's process
  1173  func setupDaemonProcess(config *Config) error {
  1174  	// setup the daemons oom_score_adj
  1175  	return setupOOMScoreAdj(config.OOMScoreAdjust)
  1176  }
  1177  
  1178  func setupOOMScoreAdj(score int) error {
  1179  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1180  	if err != nil {
  1181  		return err
  1182  	}
  1183  
  1184  	stringScore := strconv.Itoa(score)
  1185  	_, err = f.WriteString(stringScore)
  1186  	if os.IsPermission(err) {
  1187  		// Setting oom_score_adj does not work in an
  1188  		// unprivileged container. Ignore the error, but log
  1189  		// it if we appear not to be in that situation.
  1190  		if !rsystem.RunningInUserNS() {
  1191  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1192  		}
  1193  		return nil
  1194  	}
  1195  	f.Close()
  1196  	return err
  1197  }
  1198  
  1199  func (daemon *Daemon) initCgroupsPath(path string) error {
  1200  	if path == "/" || path == "." {
  1201  		return nil
  1202  	}
  1203  
  1204  	daemon.initCgroupsPath(filepath.Dir(path))
  1205  
  1206  	_, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1207  	if err != nil {
  1208  		return err
  1209  	}
  1210  
  1211  	path = filepath.Join(root, path)
  1212  	sysinfo := sysinfo.New(false)
  1213  	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1214  		return err
  1215  	}
  1216  	if sysinfo.CPURealtimePeriod && daemon.configStore.CPURealtimePeriod != 0 {
  1217  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_period_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimePeriod, 10)), 0700); err != nil {
  1218  			return err
  1219  		}
  1220  	}
  1221  	if sysinfo.CPURealtimeRuntime && daemon.configStore.CPURealtimeRuntime != 0 {
  1222  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_runtime_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimeRuntime, 10)), 0700); err != nil {
  1223  			return err
  1224  		}
  1225  	}
  1226  
  1227  	return nil
  1228  }