github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"runtime/debug"
    14  	"strconv"
    15  	"strings"
    16  	"syscall"
    17  	"time"
    18  
    19  	"github.com/Sirupsen/logrus"
    20  	"github.com/docker/docker/api/types"
    21  	"github.com/docker/docker/api/types/blkiodev"
    22  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    23  	containertypes "github.com/docker/docker/api/types/container"
    24  	"github.com/docker/docker/container"
    25  	"github.com/docker/docker/image"
    26  	"github.com/docker/docker/pkg/idtools"
    27  	"github.com/docker/docker/pkg/parsers"
    28  	"github.com/docker/docker/pkg/parsers/kernel"
    29  	"github.com/docker/docker/pkg/sysinfo"
    30  	"github.com/docker/docker/runconfig"
    31  	runconfigopts "github.com/docker/docker/runconfig/opts"
    32  	"github.com/docker/libnetwork"
    33  	nwconfig "github.com/docker/libnetwork/config"
    34  	"github.com/docker/libnetwork/drivers/bridge"
    35  	"github.com/docker/libnetwork/netlabel"
    36  	"github.com/docker/libnetwork/netutils"
    37  	"github.com/docker/libnetwork/options"
    38  	lntypes "github.com/docker/libnetwork/types"
    39  	"github.com/golang/protobuf/ptypes"
    40  	"github.com/opencontainers/runc/libcontainer/cgroups"
    41  	"github.com/opencontainers/runc/libcontainer/label"
    42  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    43  	specs "github.com/opencontainers/runtime-spec/specs-go"
    44  	"github.com/pkg/errors"
    45  	"github.com/vishvananda/netlink"
    46  )
    47  
    48  const (
    49  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    50  	linuxMinCPUShares = 2
    51  	linuxMaxCPUShares = 262144
    52  	platformSupported = true
    53  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    54  	linuxMinMemory = 4194304
    55  	// constants for remapped root settings
    56  	defaultIDSpecifier string = "default"
    57  	defaultRemappedID  string = "dockremap"
    58  
    59  	// constant for cgroup drivers
    60  	cgroupFsDriver      = "cgroupfs"
    61  	cgroupSystemdDriver = "systemd"
    62  )
    63  
    64  func getMemoryResources(config containertypes.Resources) *specs.Memory {
    65  	memory := specs.Memory{}
    66  
    67  	if config.Memory > 0 {
    68  		limit := uint64(config.Memory)
    69  		memory.Limit = &limit
    70  	}
    71  
    72  	if config.MemoryReservation > 0 {
    73  		reservation := uint64(config.MemoryReservation)
    74  		memory.Reservation = &reservation
    75  	}
    76  
    77  	if config.MemorySwap != 0 {
    78  		swap := uint64(config.MemorySwap)
    79  		memory.Swap = &swap
    80  	}
    81  
    82  	if config.MemorySwappiness != nil {
    83  		swappiness := uint64(*config.MemorySwappiness)
    84  		memory.Swappiness = &swappiness
    85  	}
    86  
    87  	if config.KernelMemory != 0 {
    88  		kernelMemory := uint64(config.KernelMemory)
    89  		memory.Kernel = &kernelMemory
    90  	}
    91  
    92  	return &memory
    93  }
    94  
    95  func getCPUResources(config containertypes.Resources) *specs.CPU {
    96  	cpu := specs.CPU{}
    97  
    98  	if config.CPUShares != 0 {
    99  		shares := uint64(config.CPUShares)
   100  		cpu.Shares = &shares
   101  	}
   102  
   103  	if config.CpusetCpus != "" {
   104  		cpuset := config.CpusetCpus
   105  		cpu.Cpus = &cpuset
   106  	}
   107  
   108  	if config.CpusetMems != "" {
   109  		cpuset := config.CpusetMems
   110  		cpu.Mems = &cpuset
   111  	}
   112  
   113  	if config.NanoCPUs > 0 {
   114  		// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   115  		period := uint64(100 * time.Millisecond / time.Microsecond)
   116  		quota := uint64(config.NanoCPUs) * period / 1e9
   117  		cpu.Period = &period
   118  		cpu.Quota = &quota
   119  	}
   120  
   121  	if config.CPUPeriod != 0 {
   122  		period := uint64(config.CPUPeriod)
   123  		cpu.Period = &period
   124  	}
   125  
   126  	if config.CPUQuota != 0 {
   127  		quota := uint64(config.CPUQuota)
   128  		cpu.Quota = &quota
   129  	}
   130  
   131  	if config.CPURealtimePeriod != 0 {
   132  		period := uint64(config.CPURealtimePeriod)
   133  		cpu.RealtimePeriod = &period
   134  	}
   135  
   136  	if config.CPURealtimeRuntime != 0 {
   137  		runtime := uint64(config.CPURealtimeRuntime)
   138  		cpu.RealtimeRuntime = &runtime
   139  	}
   140  
   141  	return &cpu
   142  }
   143  
   144  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
   145  	var stat syscall.Stat_t
   146  	var blkioWeightDevices []specs.WeightDevice
   147  
   148  	for _, weightDevice := range config.BlkioWeightDevice {
   149  		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
   150  			return nil, err
   151  		}
   152  		weight := weightDevice.Weight
   153  		d := specs.WeightDevice{Weight: &weight}
   154  		d.Major = int64(stat.Rdev / 256)
   155  		d.Minor = int64(stat.Rdev % 256)
   156  		blkioWeightDevices = append(blkioWeightDevices, d)
   157  	}
   158  
   159  	return blkioWeightDevices, nil
   160  }
   161  
   162  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   163  	var (
   164  		labelOpts []string
   165  		err       error
   166  	)
   167  
   168  	for _, opt := range config.SecurityOpt {
   169  		if opt == "no-new-privileges" {
   170  			container.NoNewPrivileges = true
   171  			continue
   172  		}
   173  
   174  		var con []string
   175  		if strings.Contains(opt, "=") {
   176  			con = strings.SplitN(opt, "=", 2)
   177  		} else if strings.Contains(opt, ":") {
   178  			con = strings.SplitN(opt, ":", 2)
   179  			logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 1.14, use `=` instead.")
   180  		}
   181  
   182  		if len(con) != 2 {
   183  			return fmt.Errorf("invalid --security-opt 1: %q", opt)
   184  		}
   185  
   186  		switch con[0] {
   187  		case "label":
   188  			labelOpts = append(labelOpts, con[1])
   189  		case "apparmor":
   190  			container.AppArmorProfile = con[1]
   191  		case "seccomp":
   192  			container.SeccompProfile = con[1]
   193  		default:
   194  			return fmt.Errorf("invalid --security-opt 2: %q", opt)
   195  		}
   196  	}
   197  
   198  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   199  	return err
   200  }
   201  
   202  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.ThrottleDevice, error) {
   203  	var throttleDevices []specs.ThrottleDevice
   204  	var stat syscall.Stat_t
   205  
   206  	for _, d := range devs {
   207  		if err := syscall.Stat(d.Path, &stat); err != nil {
   208  			return nil, err
   209  		}
   210  		rate := d.Rate
   211  		d := specs.ThrottleDevice{Rate: &rate}
   212  		d.Major = int64(stat.Rdev / 256)
   213  		d.Minor = int64(stat.Rdev % 256)
   214  		throttleDevices = append(throttleDevices, d)
   215  	}
   216  
   217  	return throttleDevices, nil
   218  }
   219  
   220  func checkKernel() error {
   221  	// Check for unsupported kernel versions
   222  	// FIXME: it would be cleaner to not test for specific versions, but rather
   223  	// test for specific functionalities.
   224  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   225  	// without actually causing a kernel panic, so we need this workaround until
   226  	// the circumstances of pre-3.10 crashes are clearer.
   227  	// For details see https://github.com/docker/docker/issues/407
   228  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   229  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   230  	if !kernel.CheckKernelVersion(3, 10, 0) {
   231  		v, _ := kernel.GetKernelVersion()
   232  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   233  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   234  		}
   235  	}
   236  	return nil
   237  }
   238  
   239  // adaptContainerSettings is called during container creation to modify any
   240  // settings necessary in the HostConfig structure.
   241  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   242  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   243  		// Handle unsupported CPUShares
   244  		if hostConfig.CPUShares < linuxMinCPUShares {
   245  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   246  			hostConfig.CPUShares = linuxMinCPUShares
   247  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   248  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   249  			hostConfig.CPUShares = linuxMaxCPUShares
   250  		}
   251  	}
   252  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   253  		// By default, MemorySwap is set to twice the size of Memory.
   254  		hostConfig.MemorySwap = hostConfig.Memory * 2
   255  	}
   256  	if hostConfig.ShmSize == 0 {
   257  		hostConfig.ShmSize = container.DefaultSHMSize
   258  	}
   259  	var err error
   260  	opts, err := daemon.generateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode, hostConfig.Privileged)
   261  	if err != nil {
   262  		return err
   263  	}
   264  	hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
   265  	if hostConfig.MemorySwappiness == nil {
   266  		defaultSwappiness := int64(-1)
   267  		hostConfig.MemorySwappiness = &defaultSwappiness
   268  	}
   269  	if hostConfig.OomKillDisable == nil {
   270  		defaultOomKillDisable := false
   271  		hostConfig.OomKillDisable = &defaultOomKillDisable
   272  	}
   273  
   274  	return nil
   275  }
   276  
   277  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   278  	warnings := []string{}
   279  
   280  	// memory subsystem checks and adjustments
   281  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   282  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   283  	}
   284  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   285  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   286  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   287  		resources.Memory = 0
   288  		resources.MemorySwap = -1
   289  	}
   290  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   291  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   292  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   293  		resources.MemorySwap = -1
   294  	}
   295  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   296  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   297  	}
   298  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   299  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   300  	}
   301  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness {
   302  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   303  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   304  		resources.MemorySwappiness = nil
   305  	}
   306  	if resources.MemorySwappiness != nil {
   307  		swappiness := *resources.MemorySwappiness
   308  		if swappiness < -1 || swappiness > 100 {
   309  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   310  		}
   311  	}
   312  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   313  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   314  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   315  		resources.MemoryReservation = 0
   316  	}
   317  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   318  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   319  	}
   320  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   321  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   322  	}
   323  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   324  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   325  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   326  		resources.KernelMemory = 0
   327  	}
   328  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   329  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   330  	}
   331  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   332  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   333  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   334  	}
   335  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   336  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   337  		// warning the caller if they already wanted the feature to be off
   338  		if *resources.OomKillDisable {
   339  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   340  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   341  		}
   342  		resources.OomKillDisable = nil
   343  	}
   344  
   345  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   346  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   347  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   348  		resources.PidsLimit = 0
   349  	}
   350  
   351  	// cpu subsystem checks and adjustments
   352  	if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
   353  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
   354  	}
   355  	if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
   356  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
   357  	}
   358  	if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
   359  		return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
   360  	}
   361  	// The highest precision we could get on Linux is 0.001, by setting
   362  	//   cpu.cfs_period_us=1000ms
   363  	//   cpu.cfs_quota=1ms
   364  	// See the following link for details:
   365  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   366  	// Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
   367  	// The error message is 0.01 so that this is consistent with Windows
   368  	if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
   369  		return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
   370  	}
   371  
   372  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   373  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   374  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   375  		resources.CPUShares = 0
   376  	}
   377  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   378  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   379  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   380  		resources.CPUPeriod = 0
   381  	}
   382  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   383  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   384  	}
   385  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   386  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   387  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   388  		resources.CPUQuota = 0
   389  	}
   390  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   391  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   392  	}
   393  	if resources.CPUPercent > 0 {
   394  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   395  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   396  		resources.CPUPercent = 0
   397  	}
   398  
   399  	// cpuset subsystem checks and adjustments
   400  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   401  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   402  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   403  		resources.CpusetCpus = ""
   404  		resources.CpusetMems = ""
   405  	}
   406  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   407  	if err != nil {
   408  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   409  	}
   410  	if !cpusAvailable {
   411  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   412  	}
   413  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   414  	if err != nil {
   415  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   416  	}
   417  	if !memsAvailable {
   418  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   419  	}
   420  
   421  	// blkio subsystem checks and adjustments
   422  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   423  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   424  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   425  		resources.BlkioWeight = 0
   426  	}
   427  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   428  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   429  	}
   430  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   431  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   432  	}
   433  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   434  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   435  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   436  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   437  	}
   438  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   439  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   440  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   441  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   442  	}
   443  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   444  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   445  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   446  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   447  	}
   448  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   449  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   450  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   451  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   452  	}
   453  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   454  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   455  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   456  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   457  	}
   458  
   459  	return warnings, nil
   460  }
   461  
   462  func (daemon *Daemon) getCgroupDriver() string {
   463  	cgroupDriver := cgroupFsDriver
   464  
   465  	if UsingSystemd(daemon.configStore) {
   466  		cgroupDriver = cgroupSystemdDriver
   467  	}
   468  	return cgroupDriver
   469  }
   470  
   471  // getCD gets the raw value of the native.cgroupdriver option, if set.
   472  func getCD(config *Config) string {
   473  	for _, option := range config.ExecOptions {
   474  		key, val, err := parsers.ParseKeyValueOpt(option)
   475  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   476  			continue
   477  		}
   478  		return val
   479  	}
   480  	return ""
   481  }
   482  
   483  // VerifyCgroupDriver validates native.cgroupdriver
   484  func VerifyCgroupDriver(config *Config) error {
   485  	cd := getCD(config)
   486  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   487  		return nil
   488  	}
   489  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   490  }
   491  
   492  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   493  func UsingSystemd(config *Config) bool {
   494  	return getCD(config) == cgroupSystemdDriver
   495  }
   496  
   497  // verifyPlatformContainerSettings performs platform-specific validation of the
   498  // hostconfig and config structures.
   499  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   500  	warnings := []string{}
   501  	sysInfo := sysinfo.New(true)
   502  
   503  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   504  	if err != nil {
   505  		return warnings, err
   506  	}
   507  
   508  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   509  
   510  	// no matter err is nil or not, w could have data in itself.
   511  	warnings = append(warnings, w...)
   512  
   513  	if err != nil {
   514  		return warnings, err
   515  	}
   516  
   517  	if hostConfig.ShmSize < 0 {
   518  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   519  	}
   520  
   521  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   522  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   523  	}
   524  
   525  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   526  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   527  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   528  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   529  	}
   530  	// check for various conflicting options with user namespaces
   531  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   532  		if hostConfig.Privileged {
   533  			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
   534  		}
   535  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   536  			return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
   537  		}
   538  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   539  			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
   540  		}
   541  	}
   542  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   543  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   544  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   545  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   546  		}
   547  	}
   548  	if hostConfig.Runtime == "" {
   549  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   550  	}
   551  
   552  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   553  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   554  	}
   555  
   556  	return warnings, nil
   557  }
   558  
   559  // platformReload update configuration with platform specific options
   560  func (daemon *Daemon) platformReload(config *Config) map[string]string {
   561  	if config.IsValueSet("runtimes") {
   562  		daemon.configStore.Runtimes = config.Runtimes
   563  		// Always set the default one
   564  		daemon.configStore.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   565  	}
   566  
   567  	if config.DefaultRuntime != "" {
   568  		daemon.configStore.DefaultRuntime = config.DefaultRuntime
   569  	}
   570  
   571  	// Update attributes
   572  	var runtimeList bytes.Buffer
   573  	for name, rt := range daemon.configStore.Runtimes {
   574  		if runtimeList.Len() > 0 {
   575  			runtimeList.WriteRune(' ')
   576  		}
   577  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   578  	}
   579  
   580  	return map[string]string{
   581  		"runtimes":        runtimeList.String(),
   582  		"default-runtime": daemon.configStore.DefaultRuntime,
   583  	}
   584  }
   585  
   586  // verifyDaemonSettings performs validation of daemon config struct
   587  func verifyDaemonSettings(config *Config) error {
   588  	// Check for mutually incompatible config options
   589  	if config.bridgeConfig.Iface != "" && config.bridgeConfig.IP != "" {
   590  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   591  	}
   592  	if !config.bridgeConfig.EnableIPTables && !config.bridgeConfig.InterContainerCommunication {
   593  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   594  	}
   595  	if !config.bridgeConfig.EnableIPTables && config.bridgeConfig.EnableIPMasq {
   596  		config.bridgeConfig.EnableIPMasq = false
   597  	}
   598  	if err := VerifyCgroupDriver(config); err != nil {
   599  		return err
   600  	}
   601  	if config.CgroupParent != "" && UsingSystemd(config) {
   602  		if len(config.CgroupParent) <= 6 || !strings.HasSuffix(config.CgroupParent, ".slice") {
   603  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   604  		}
   605  	}
   606  
   607  	if config.DefaultRuntime == "" {
   608  		config.DefaultRuntime = stockRuntimeName
   609  	}
   610  	if config.Runtimes == nil {
   611  		config.Runtimes = make(map[string]types.Runtime)
   612  	}
   613  	config.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   614  
   615  	return nil
   616  }
   617  
   618  // checkSystem validates platform-specific requirements
   619  func checkSystem() error {
   620  	if os.Geteuid() != 0 {
   621  		return fmt.Errorf("The Docker daemon needs to be run as root")
   622  	}
   623  	return checkKernel()
   624  }
   625  
   626  // configureMaxThreads sets the Go runtime max threads threshold
   627  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   628  func configureMaxThreads(config *Config) error {
   629  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   630  	if err != nil {
   631  		return err
   632  	}
   633  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   634  	if err != nil {
   635  		return err
   636  	}
   637  	maxThreads := (mtint / 100) * 90
   638  	debug.SetMaxThreads(maxThreads)
   639  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   640  	return nil
   641  }
   642  
   643  // configureKernelSecuritySupport configures and validates security support for the kernel
   644  func configureKernelSecuritySupport(config *Config, driverName string) error {
   645  	if config.EnableSelinuxSupport {
   646  		if !selinuxEnabled() {
   647  			logrus.Warn("Docker could not enable SELinux on the host system")
   648  		}
   649  	} else {
   650  		selinuxSetDisabled()
   651  	}
   652  	return nil
   653  }
   654  
   655  func (daemon *Daemon) initNetworkController(config *Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   656  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   657  	if err != nil {
   658  		return nil, err
   659  	}
   660  
   661  	controller, err := libnetwork.New(netOptions...)
   662  	if err != nil {
   663  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   664  	}
   665  
   666  	if len(activeSandboxes) > 0 {
   667  		logrus.Info("There are old running containers, the network config will not take affect")
   668  		return controller, nil
   669  	}
   670  
   671  	// Initialize default network on "null"
   672  	if n, _ := controller.NetworkByName("none"); n == nil {
   673  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   674  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   675  		}
   676  	}
   677  
   678  	// Initialize default network on "host"
   679  	if n, _ := controller.NetworkByName("host"); n == nil {
   680  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   681  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   682  		}
   683  	}
   684  
   685  	// Clear stale bridge network
   686  	if n, err := controller.NetworkByName("bridge"); err == nil {
   687  		if err = n.Delete(); err != nil {
   688  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   689  		}
   690  	}
   691  
   692  	if !config.DisableBridge {
   693  		// Initialize default driver "bridge"
   694  		if err := initBridgeDriver(controller, config); err != nil {
   695  			return nil, err
   696  		}
   697  	} else {
   698  		removeDefaultBridgeInterface()
   699  	}
   700  
   701  	return controller, nil
   702  }
   703  
   704  func driverOptions(config *Config) []nwconfig.Option {
   705  	bridgeConfig := options.Generic{
   706  		"EnableIPForwarding":  config.bridgeConfig.EnableIPForward,
   707  		"EnableIPTables":      config.bridgeConfig.EnableIPTables,
   708  		"EnableUserlandProxy": config.bridgeConfig.EnableUserlandProxy,
   709  		"UserlandProxyPath":   config.bridgeConfig.UserlandProxyPath}
   710  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   711  
   712  	dOptions := []nwconfig.Option{}
   713  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   714  	return dOptions
   715  }
   716  
   717  func initBridgeDriver(controller libnetwork.NetworkController, config *Config) error {
   718  	bridgeName := bridge.DefaultBridgeName
   719  	if config.bridgeConfig.Iface != "" {
   720  		bridgeName = config.bridgeConfig.Iface
   721  	}
   722  	netOption := map[string]string{
   723  		bridge.BridgeName:         bridgeName,
   724  		bridge.DefaultBridge:      strconv.FormatBool(true),
   725  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   726  		bridge.EnableIPMasquerade: strconv.FormatBool(config.bridgeConfig.EnableIPMasq),
   727  		bridge.EnableICC:          strconv.FormatBool(config.bridgeConfig.InterContainerCommunication),
   728  	}
   729  
   730  	// --ip processing
   731  	if config.bridgeConfig.DefaultIP != nil {
   732  		netOption[bridge.DefaultBindingIP] = config.bridgeConfig.DefaultIP.String()
   733  	}
   734  
   735  	var (
   736  		ipamV4Conf *libnetwork.IpamConf
   737  		ipamV6Conf *libnetwork.IpamConf
   738  	)
   739  
   740  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   741  
   742  	nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   743  	if err != nil {
   744  		return errors.Wrap(err, "list bridge addresses failed")
   745  	}
   746  
   747  	nw := nwList[0]
   748  	if len(nwList) > 1 && config.bridgeConfig.FixedCIDR != "" {
   749  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   750  		if err != nil {
   751  			return errors.Wrap(err, "parse CIDR failed")
   752  		}
   753  		// Iterate through in case there are multiple addresses for the bridge
   754  		for _, entry := range nwList {
   755  			if fCIDR.Contains(entry.IP) {
   756  				nw = entry
   757  				break
   758  			}
   759  		}
   760  	}
   761  
   762  	ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   763  	hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   764  	if hip.IsGlobalUnicast() {
   765  		ipamV4Conf.Gateway = nw.IP.String()
   766  	}
   767  
   768  	if config.bridgeConfig.IP != "" {
   769  		ipamV4Conf.PreferredPool = config.bridgeConfig.IP
   770  		ip, _, err := net.ParseCIDR(config.bridgeConfig.IP)
   771  		if err != nil {
   772  			return err
   773  		}
   774  		ipamV4Conf.Gateway = ip.String()
   775  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   776  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   777  	}
   778  
   779  	if config.bridgeConfig.FixedCIDR != "" {
   780  		_, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR)
   781  		if err != nil {
   782  			return err
   783  		}
   784  
   785  		ipamV4Conf.SubPool = fCIDR.String()
   786  	}
   787  
   788  	if config.bridgeConfig.DefaultGatewayIPv4 != nil {
   789  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.bridgeConfig.DefaultGatewayIPv4.String()
   790  	}
   791  
   792  	var deferIPv6Alloc bool
   793  	if config.bridgeConfig.FixedCIDRv6 != "" {
   794  		_, fCIDRv6, err := net.ParseCIDR(config.bridgeConfig.FixedCIDRv6)
   795  		if err != nil {
   796  			return err
   797  		}
   798  
   799  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
   800  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
   801  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
   802  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
   803  		// on this network until after the driver has created the endpoint and returned the
   804  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
   805  		ones, _ := fCIDRv6.Mask.Size()
   806  		deferIPv6Alloc = ones <= 80
   807  
   808  		if ipamV6Conf == nil {
   809  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   810  		}
   811  		ipamV6Conf.PreferredPool = fCIDRv6.String()
   812  
   813  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
   814  		// address belongs to the same network, we need to inform libnetwork about it, so
   815  		// that it can be reserved with IPAM and it will not be given away to somebody else
   816  		for _, nw6 := range nw6List {
   817  			if fCIDRv6.Contains(nw6.IP) {
   818  				ipamV6Conf.Gateway = nw6.IP.String()
   819  				break
   820  			}
   821  		}
   822  	}
   823  
   824  	if config.bridgeConfig.DefaultGatewayIPv6 != nil {
   825  		if ipamV6Conf == nil {
   826  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   827  		}
   828  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.bridgeConfig.DefaultGatewayIPv6.String()
   829  	}
   830  
   831  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
   832  	v6Conf := []*libnetwork.IpamConf{}
   833  	if ipamV6Conf != nil {
   834  		v6Conf = append(v6Conf, ipamV6Conf)
   835  	}
   836  	// Initialize default network on "bridge" with the same name
   837  	_, err = controller.NewNetwork("bridge", "bridge", "",
   838  		libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6),
   839  		libnetwork.NetworkOptionDriverOpts(netOption),
   840  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
   841  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
   842  	if err != nil {
   843  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
   844  	}
   845  	return nil
   846  }
   847  
   848  // Remove default bridge interface if present (--bridge=none use case)
   849  func removeDefaultBridgeInterface() {
   850  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
   851  		if err := netlink.LinkDel(lnk); err != nil {
   852  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
   853  		}
   854  	}
   855  }
   856  
   857  func (daemon *Daemon) getLayerInit() func(string) error {
   858  	return daemon.setupInitLayer
   859  }
   860  
   861  // setupInitLayer populates a directory with mountpoints suitable
   862  // for bind-mounting things into the container.
   863  //
   864  // This extra layer is used by all containers as the top-most ro layer. It protects
   865  // the container from unwanted side-effects on the rw layer.
   866  func setupInitLayer(initLayer string, rootUID, rootGID int) error {
   867  	for pth, typ := range map[string]string{
   868  		"/dev/pts":         "dir",
   869  		"/dev/shm":         "dir",
   870  		"/proc":            "dir",
   871  		"/sys":             "dir",
   872  		"/.dockerenv":      "file",
   873  		"/etc/resolv.conf": "file",
   874  		"/etc/hosts":       "file",
   875  		"/etc/hostname":    "file",
   876  		"/dev/console":     "file",
   877  		"/etc/mtab":        "/proc/mounts",
   878  	} {
   879  		parts := strings.Split(pth, "/")
   880  		prev := "/"
   881  		for _, p := range parts[1:] {
   882  			prev = filepath.Join(prev, p)
   883  			syscall.Unlink(filepath.Join(initLayer, prev))
   884  		}
   885  
   886  		if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
   887  			if os.IsNotExist(err) {
   888  				if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil {
   889  					return err
   890  				}
   891  				switch typ {
   892  				case "dir":
   893  					if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil {
   894  						return err
   895  					}
   896  				case "file":
   897  					f, err := os.OpenFile(filepath.Join(initLayer, pth), os.O_CREATE, 0755)
   898  					if err != nil {
   899  						return err
   900  					}
   901  					f.Chown(rootUID, rootGID)
   902  					f.Close()
   903  				default:
   904  					if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
   905  						return err
   906  					}
   907  				}
   908  			} else {
   909  				return err
   910  			}
   911  		}
   912  	}
   913  
   914  	// Layer is ready to use, if it wasn't before.
   915  	return nil
   916  }
   917  
   918  // Parse the remapped root (user namespace) option, which can be one of:
   919  //   username            - valid username from /etc/passwd
   920  //   username:groupname  - valid username; valid groupname from /etc/group
   921  //   uid                 - 32-bit unsigned int valid Linux UID value
   922  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
   923  //
   924  //  If no groupname is specified, and a username is specified, an attempt
   925  //  will be made to lookup a gid for that username as a groupname
   926  //
   927  //  If names are used, they are verified to exist in passwd/group
   928  func parseRemappedRoot(usergrp string) (string, string, error) {
   929  
   930  	var (
   931  		userID, groupID     int
   932  		username, groupname string
   933  	)
   934  
   935  	idparts := strings.Split(usergrp, ":")
   936  	if len(idparts) > 2 {
   937  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
   938  	}
   939  
   940  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
   941  		// must be a uid; take it as valid
   942  		userID = int(uid)
   943  		luser, err := idtools.LookupUID(userID)
   944  		if err != nil {
   945  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
   946  		}
   947  		username = luser.Name
   948  		if len(idparts) == 1 {
   949  			// if the uid was numeric and no gid was specified, take the uid as the gid
   950  			groupID = userID
   951  			lgrp, err := idtools.LookupGID(groupID)
   952  			if err != nil {
   953  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
   954  			}
   955  			groupname = lgrp.Name
   956  		}
   957  	} else {
   958  		lookupName := idparts[0]
   959  		// special case: if the user specified "default", they want Docker to create or
   960  		// use (after creation) the "dockremap" user/group for root remapping
   961  		if lookupName == defaultIDSpecifier {
   962  			lookupName = defaultRemappedID
   963  		}
   964  		luser, err := idtools.LookupUser(lookupName)
   965  		if err != nil && idparts[0] != defaultIDSpecifier {
   966  			// error if the name requested isn't the special "dockremap" ID
   967  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
   968  		} else if err != nil {
   969  			// special case-- if the username == "default", then we have been asked
   970  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
   971  			// ranges will be used for the user and group mappings in user namespaced containers
   972  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
   973  			if err == nil {
   974  				return defaultRemappedID, defaultRemappedID, nil
   975  			}
   976  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
   977  		}
   978  		username = luser.Name
   979  		if len(idparts) == 1 {
   980  			// we only have a string username, and no group specified; look up gid from username as group
   981  			group, err := idtools.LookupGroup(lookupName)
   982  			if err != nil {
   983  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
   984  			}
   985  			groupID = group.Gid
   986  			groupname = group.Name
   987  		}
   988  	}
   989  
   990  	if len(idparts) == 2 {
   991  		// groupname or gid is separately specified and must be resolved
   992  		// to an unsigned 32-bit gid
   993  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
   994  			// must be a gid, take it as valid
   995  			groupID = int(gid)
   996  			lgrp, err := idtools.LookupGID(groupID)
   997  			if err != nil {
   998  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
   999  			}
  1000  			groupname = lgrp.Name
  1001  		} else {
  1002  			// not a number; attempt a lookup
  1003  			if _, err := idtools.LookupGroup(idparts[1]); err != nil {
  1004  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
  1005  			}
  1006  			groupname = idparts[1]
  1007  		}
  1008  	}
  1009  	return username, groupname, nil
  1010  }
  1011  
  1012  func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
  1013  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
  1014  		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
  1015  	}
  1016  
  1017  	// if the daemon was started with remapped root option, parse
  1018  	// the config option to the int uid,gid values
  1019  	var (
  1020  		uidMaps, gidMaps []idtools.IDMap
  1021  	)
  1022  	if config.RemappedRoot != "" {
  1023  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
  1024  		if err != nil {
  1025  			return nil, nil, err
  1026  		}
  1027  		if username == "root" {
  1028  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  1029  			// effectively
  1030  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  1031  			return uidMaps, gidMaps, nil
  1032  		}
  1033  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  1034  		// update remapped root setting now that we have resolved them to actual names
  1035  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  1036  
  1037  		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
  1038  		if err != nil {
  1039  			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
  1040  		}
  1041  	}
  1042  	return uidMaps, gidMaps, nil
  1043  }
  1044  
  1045  func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
  1046  	config.Root = rootDir
  1047  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1048  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
  1049  	// (e.g. mounted layers of a container) can traverse this path.
  1050  	// The user namespace support will create subdirectories for the remapped root host uid:gid
  1051  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1052  	// layer content subtrees.
  1053  	if _, err := os.Stat(rootDir); err == nil {
  1054  		// root current exists; verify the access bits are correct by setting them
  1055  		if err = os.Chmod(rootDir, 0711); err != nil {
  1056  			return err
  1057  		}
  1058  	} else if os.IsNotExist(err) {
  1059  		// no root exists yet, create it 0711 with root:root ownership
  1060  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1061  			return err
  1062  		}
  1063  	}
  1064  
  1065  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1066  	// with any/all specified remapped root uid/gid options on the daemon creating
  1067  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1068  	// `chdir()` to work for containers namespaced to that uid/gid)
  1069  	if config.RemappedRoot != "" {
  1070  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
  1071  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1072  		// Create the root directory if it doesn't exist
  1073  		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
  1074  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1075  		}
  1076  		// we also need to verify that any pre-existing directories in the path to
  1077  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1078  		// has strict permissions that don't allow "x", container start will fail, so
  1079  		// better to warn and fail now
  1080  		dirPath := config.Root
  1081  		for {
  1082  			dirPath = filepath.Dir(dirPath)
  1083  			if dirPath == "/" {
  1084  				break
  1085  			}
  1086  			if !idtools.CanAccess(dirPath, rootUID, rootGID) {
  1087  				return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1088  			}
  1089  		}
  1090  	}
  1091  	return nil
  1092  }
  1093  
  1094  // registerLinks writes the links to a file.
  1095  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1096  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1097  		return nil
  1098  	}
  1099  
  1100  	for _, l := range hostConfig.Links {
  1101  		name, alias, err := runconfigopts.ParseLink(l)
  1102  		if err != nil {
  1103  			return err
  1104  		}
  1105  		child, err := daemon.GetContainer(name)
  1106  		if err != nil {
  1107  			return fmt.Errorf("Could not get container for %s", name)
  1108  		}
  1109  		for child.HostConfig.NetworkMode.IsContainer() {
  1110  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1111  			child, err = daemon.GetContainer(parts[1])
  1112  			if err != nil {
  1113  				return fmt.Errorf("Could not get container for %s", parts[1])
  1114  			}
  1115  		}
  1116  		if child.HostConfig.NetworkMode.IsHost() {
  1117  			return runconfig.ErrConflictHostNetworkAndLinks
  1118  		}
  1119  		if err := daemon.registerLink(container, child, alias); err != nil {
  1120  			return err
  1121  		}
  1122  	}
  1123  
  1124  	// After we load all the links into the daemon
  1125  	// set them to nil on the hostconfig
  1126  	return container.WriteHostConfig()
  1127  }
  1128  
  1129  // conditionalMountOnStart is a platform specific helper function during the
  1130  // container start to call mount.
  1131  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1132  	return daemon.Mount(container)
  1133  }
  1134  
  1135  // conditionalUnmountOnCleanup is a platform specific helper function called
  1136  // during the cleanup of a container to unmount.
  1137  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1138  	return daemon.Unmount(container)
  1139  }
  1140  
  1141  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1142  	if !c.IsRunning() {
  1143  		return nil, errNotRunning{c.ID}
  1144  	}
  1145  	stats, err := daemon.containerd.Stats(c.ID)
  1146  	if err != nil {
  1147  		return nil, err
  1148  	}
  1149  	s := &types.StatsJSON{}
  1150  	cgs := stats.CgroupStats
  1151  	if cgs != nil {
  1152  		s.BlkioStats = types.BlkioStats{
  1153  			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1154  			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1155  			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1156  			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1157  			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1158  			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1159  			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1160  			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1161  		}
  1162  		cpu := cgs.CpuStats
  1163  		s.CPUStats = types.CPUStats{
  1164  			CPUUsage: types.CPUUsage{
  1165  				TotalUsage:        cpu.CpuUsage.TotalUsage,
  1166  				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
  1167  				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1168  				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
  1169  			},
  1170  			ThrottlingData: types.ThrottlingData{
  1171  				Periods:          cpu.ThrottlingData.Periods,
  1172  				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1173  				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
  1174  			},
  1175  		}
  1176  		mem := cgs.MemoryStats.Usage
  1177  		s.MemoryStats = types.MemoryStats{
  1178  			Usage:    mem.Usage,
  1179  			MaxUsage: mem.MaxUsage,
  1180  			Stats:    cgs.MemoryStats.Stats,
  1181  			Failcnt:  mem.Failcnt,
  1182  			Limit:    mem.Limit,
  1183  		}
  1184  		// if the container does not set memory limit, use the machineMemory
  1185  		if mem.Limit > daemon.statsCollector.machineMemory && daemon.statsCollector.machineMemory > 0 {
  1186  			s.MemoryStats.Limit = daemon.statsCollector.machineMemory
  1187  		}
  1188  		if cgs.PidsStats != nil {
  1189  			s.PidsStats = types.PidsStats{
  1190  				Current: cgs.PidsStats.Current,
  1191  			}
  1192  		}
  1193  	}
  1194  	s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1195  	if err != nil {
  1196  		return nil, err
  1197  	}
  1198  	return s, nil
  1199  }
  1200  
  1201  // setDefaultIsolation determines the default isolation mode for the
  1202  // daemon to run in. This is only applicable on Windows
  1203  func (daemon *Daemon) setDefaultIsolation() error {
  1204  	return nil
  1205  }
  1206  
  1207  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1208  	var layers []string
  1209  	for _, l := range rootfs.DiffIDs {
  1210  		layers = append(layers, l.String())
  1211  	}
  1212  	return types.RootFS{
  1213  		Type:   rootfs.Type,
  1214  		Layers: layers,
  1215  	}
  1216  }
  1217  
  1218  // setupDaemonProcess sets various settings for the daemon's process
  1219  func setupDaemonProcess(config *Config) error {
  1220  	// setup the daemons oom_score_adj
  1221  	return setupOOMScoreAdj(config.OOMScoreAdjust)
  1222  }
  1223  
  1224  func setupOOMScoreAdj(score int) error {
  1225  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1226  	if err != nil {
  1227  		return err
  1228  	}
  1229  	defer f.Close()
  1230  	stringScore := strconv.Itoa(score)
  1231  	_, err = f.WriteString(stringScore)
  1232  	if os.IsPermission(err) {
  1233  		// Setting oom_score_adj does not work in an
  1234  		// unprivileged container. Ignore the error, but log
  1235  		// it if we appear not to be in that situation.
  1236  		if !rsystem.RunningInUserNS() {
  1237  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1238  		}
  1239  		return nil
  1240  	}
  1241  
  1242  	return err
  1243  }
  1244  
  1245  func (daemon *Daemon) initCgroupsPath(path string) error {
  1246  	if path == "/" || path == "." {
  1247  		return nil
  1248  	}
  1249  
  1250  	daemon.initCgroupsPath(filepath.Dir(path))
  1251  
  1252  	_, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1253  	if err != nil {
  1254  		return err
  1255  	}
  1256  
  1257  	path = filepath.Join(root, path)
  1258  	sysinfo := sysinfo.New(true)
  1259  	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1260  		return err
  1261  	}
  1262  	if sysinfo.CPURealtimePeriod && daemon.configStore.CPURealtimePeriod != 0 {
  1263  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_period_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimePeriod, 10)), 0700); err != nil {
  1264  			return err
  1265  		}
  1266  	}
  1267  	if sysinfo.CPURealtimeRuntime && daemon.configStore.CPURealtimeRuntime != 0 {
  1268  		if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_runtime_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimeRuntime, 10)), 0700); err != nil {
  1269  			return err
  1270  		}
  1271  	}
  1272  	return nil
  1273  }
  1274  
  1275  func (daemon *Daemon) setupSeccompProfile() error {
  1276  	if daemon.configStore.SeccompProfile != "" {
  1277  		daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1278  		b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1279  		if err != nil {
  1280  			return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1281  		}
  1282  		daemon.seccompProfile = b
  1283  	}
  1284  	return nil
  1285  }