github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/daemon_unix.go (about)

     1  // +build linux freebsd
     2  
     3  package daemon
     4  
     5  import (
     6  	"bufio"
     7  	"bytes"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"net"
    11  	"os"
    12  	"path/filepath"
    13  	"runtime"
    14  	"runtime/debug"
    15  	"strconv"
    16  	"strings"
    17  	"syscall"
    18  	"time"
    19  
    20  	"github.com/Sirupsen/logrus"
    21  	"github.com/docker/docker/api/types"
    22  	"github.com/docker/docker/api/types/blkiodev"
    23  	pblkiodev "github.com/docker/docker/api/types/blkiodev"
    24  	containertypes "github.com/docker/docker/api/types/container"
    25  	"github.com/docker/docker/container"
    26  	"github.com/docker/docker/daemon/config"
    27  	"github.com/docker/docker/image"
    28  	"github.com/docker/docker/opts"
    29  	"github.com/docker/docker/pkg/idtools"
    30  	"github.com/docker/docker/pkg/parsers"
    31  	"github.com/docker/docker/pkg/parsers/kernel"
    32  	"github.com/docker/docker/pkg/sysinfo"
    33  	"github.com/docker/docker/runconfig"
    34  	"github.com/docker/docker/volume"
    35  	"github.com/docker/libnetwork"
    36  	nwconfig "github.com/docker/libnetwork/config"
    37  	"github.com/docker/libnetwork/drivers/bridge"
    38  	"github.com/docker/libnetwork/netlabel"
    39  	"github.com/docker/libnetwork/netutils"
    40  	"github.com/docker/libnetwork/options"
    41  	lntypes "github.com/docker/libnetwork/types"
    42  	"github.com/golang/protobuf/ptypes"
    43  	"github.com/opencontainers/runc/libcontainer/cgroups"
    44  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    45  	specs "github.com/opencontainers/runtime-spec/specs-go"
    46  	"github.com/opencontainers/selinux/go-selinux/label"
    47  	"github.com/pkg/errors"
    48  	"github.com/vishvananda/netlink"
    49  )
    50  
    51  const (
    52  	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
    53  	linuxMinCPUShares = 2
    54  	linuxMaxCPUShares = 262144
    55  	platformSupported = true
    56  	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
    57  	linuxMinMemory = 4194304
    58  	// constants for remapped root settings
    59  	defaultIDSpecifier string = "default"
    60  	defaultRemappedID  string = "dockremap"
    61  
    62  	// constant for cgroup drivers
    63  	cgroupFsDriver      = "cgroupfs"
    64  	cgroupSystemdDriver = "systemd"
    65  )
    66  
    67  func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory {
    68  	memory := specs.LinuxMemory{}
    69  
    70  	if config.Memory > 0 {
    71  		limit := uint64(config.Memory)
    72  		memory.Limit = &limit
    73  	}
    74  
    75  	if config.MemoryReservation > 0 {
    76  		reservation := uint64(config.MemoryReservation)
    77  		memory.Reservation = &reservation
    78  	}
    79  
    80  	if config.MemorySwap > 0 {
    81  		swap := uint64(config.MemorySwap)
    82  		memory.Swap = &swap
    83  	}
    84  
    85  	if config.MemorySwappiness != nil {
    86  		swappiness := uint64(*config.MemorySwappiness)
    87  		memory.Swappiness = &swappiness
    88  	}
    89  
    90  	if config.KernelMemory != 0 {
    91  		kernelMemory := uint64(config.KernelMemory)
    92  		memory.Kernel = &kernelMemory
    93  	}
    94  
    95  	return &memory
    96  }
    97  
    98  func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) {
    99  	cpu := specs.LinuxCPU{}
   100  
   101  	if config.CPUShares < 0 {
   102  		return nil, fmt.Errorf("shares: invalid argument")
   103  	}
   104  	if config.CPUShares >= 0 {
   105  		shares := uint64(config.CPUShares)
   106  		cpu.Shares = &shares
   107  	}
   108  
   109  	if config.CpusetCpus != "" {
   110  		cpu.Cpus = config.CpusetCpus
   111  	}
   112  
   113  	if config.CpusetMems != "" {
   114  		cpu.Mems = config.CpusetMems
   115  	}
   116  
   117  	if config.NanoCPUs > 0 {
   118  		// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   119  		period := uint64(100 * time.Millisecond / time.Microsecond)
   120  		quota := config.NanoCPUs * int64(period) / 1e9
   121  		cpu.Period = &period
   122  		cpu.Quota = &quota
   123  	}
   124  
   125  	if config.CPUPeriod != 0 {
   126  		period := uint64(config.CPUPeriod)
   127  		cpu.Period = &period
   128  	}
   129  
   130  	if config.CPUQuota != 0 {
   131  		q := config.CPUQuota
   132  		cpu.Quota = &q
   133  	}
   134  
   135  	if config.CPURealtimePeriod != 0 {
   136  		period := uint64(config.CPURealtimePeriod)
   137  		cpu.RealtimePeriod = &period
   138  	}
   139  
   140  	if config.CPURealtimeRuntime != 0 {
   141  		c := config.CPURealtimeRuntime
   142  		cpu.RealtimeRuntime = &c
   143  	}
   144  
   145  	return &cpu, nil
   146  }
   147  
   148  func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) {
   149  	var stat syscall.Stat_t
   150  	var blkioWeightDevices []specs.LinuxWeightDevice
   151  
   152  	for _, weightDevice := range config.BlkioWeightDevice {
   153  		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
   154  			return nil, err
   155  		}
   156  		weight := weightDevice.Weight
   157  		d := specs.LinuxWeightDevice{Weight: &weight}
   158  		d.Major = int64(stat.Rdev / 256)
   159  		d.Minor = int64(stat.Rdev % 256)
   160  		blkioWeightDevices = append(blkioWeightDevices, d)
   161  	}
   162  
   163  	return blkioWeightDevices, nil
   164  }
   165  
   166  func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error {
   167  	container.NoNewPrivileges = daemon.configStore.NoNewPrivileges
   168  	return parseSecurityOpt(container, hostConfig)
   169  }
   170  
   171  func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error {
   172  	var (
   173  		labelOpts []string
   174  		err       error
   175  	)
   176  
   177  	for _, opt := range config.SecurityOpt {
   178  		if opt == "no-new-privileges" {
   179  			container.NoNewPrivileges = true
   180  			continue
   181  		}
   182  		if opt == "disable" {
   183  			labelOpts = append(labelOpts, "disable")
   184  			continue
   185  		}
   186  
   187  		var con []string
   188  		if strings.Contains(opt, "=") {
   189  			con = strings.SplitN(opt, "=", 2)
   190  		} else if strings.Contains(opt, ":") {
   191  			con = strings.SplitN(opt, ":", 2)
   192  			logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.")
   193  		}
   194  		if len(con) != 2 {
   195  			return fmt.Errorf("invalid --security-opt 1: %q", opt)
   196  		}
   197  
   198  		switch con[0] {
   199  		case "label":
   200  			labelOpts = append(labelOpts, con[1])
   201  		case "apparmor":
   202  			container.AppArmorProfile = con[1]
   203  		case "seccomp":
   204  			container.SeccompProfile = con[1]
   205  		case "no-new-privileges":
   206  			noNewPrivileges, err := strconv.ParseBool(con[1])
   207  			if err != nil {
   208  				return fmt.Errorf("invalid --security-opt 2: %q", opt)
   209  			}
   210  			container.NoNewPrivileges = noNewPrivileges
   211  		default:
   212  			return fmt.Errorf("invalid --security-opt 2: %q", opt)
   213  		}
   214  	}
   215  
   216  	container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts)
   217  	return err
   218  }
   219  
   220  func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) {
   221  	var throttleDevices []specs.LinuxThrottleDevice
   222  	var stat syscall.Stat_t
   223  
   224  	for _, d := range devs {
   225  		if err := syscall.Stat(d.Path, &stat); err != nil {
   226  			return nil, err
   227  		}
   228  		d := specs.LinuxThrottleDevice{Rate: d.Rate}
   229  		d.Major = int64(stat.Rdev / 256)
   230  		d.Minor = int64(stat.Rdev % 256)
   231  		throttleDevices = append(throttleDevices, d)
   232  	}
   233  
   234  	return throttleDevices, nil
   235  }
   236  
   237  func checkKernel() error {
   238  	// Check for unsupported kernel versions
   239  	// FIXME: it would be cleaner to not test for specific versions, but rather
   240  	// test for specific functionalities.
   241  	// Unfortunately we can't test for the feature "does not cause a kernel panic"
   242  	// without actually causing a kernel panic, so we need this workaround until
   243  	// the circumstances of pre-3.10 crashes are clearer.
   244  	// For details see https://github.com/docker/docker/issues/407
   245  	// Docker 1.11 and above doesn't actually run on kernels older than 3.4,
   246  	// due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4).
   247  	if !kernel.CheckKernelVersion(3, 10, 0) {
   248  		v, _ := kernel.GetKernelVersion()
   249  		if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" {
   250  			logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String())
   251  		}
   252  	}
   253  	return nil
   254  }
   255  
   256  // adaptContainerSettings is called during container creation to modify any
   257  // settings necessary in the HostConfig structure.
   258  func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error {
   259  	if adjustCPUShares && hostConfig.CPUShares > 0 {
   260  		// Handle unsupported CPUShares
   261  		if hostConfig.CPUShares < linuxMinCPUShares {
   262  			logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares)
   263  			hostConfig.CPUShares = linuxMinCPUShares
   264  		} else if hostConfig.CPUShares > linuxMaxCPUShares {
   265  			logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares)
   266  			hostConfig.CPUShares = linuxMaxCPUShares
   267  		}
   268  	}
   269  	if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
   270  		// By default, MemorySwap is set to twice the size of Memory.
   271  		hostConfig.MemorySwap = hostConfig.Memory * 2
   272  	}
   273  	if hostConfig.ShmSize == 0 {
   274  		hostConfig.ShmSize = config.DefaultShmSize
   275  		if daemon.configStore != nil {
   276  			hostConfig.ShmSize = int64(daemon.configStore.ShmSize)
   277  		}
   278  	}
   279  	var err error
   280  	opts, err := daemon.generateSecurityOpt(hostConfig)
   281  	if err != nil {
   282  		return err
   283  	}
   284  	hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...)
   285  	if hostConfig.MemorySwappiness == nil {
   286  		defaultSwappiness := int64(-1)
   287  		hostConfig.MemorySwappiness = &defaultSwappiness
   288  	}
   289  	if hostConfig.OomKillDisable == nil {
   290  		defaultOomKillDisable := false
   291  		hostConfig.OomKillDisable = &defaultOomKillDisable
   292  	}
   293  
   294  	return nil
   295  }
   296  
   297  func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) {
   298  	warnings := []string{}
   299  
   300  	// memory subsystem checks and adjustments
   301  	if resources.Memory != 0 && resources.Memory < linuxMinMemory {
   302  		return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB")
   303  	}
   304  	if resources.Memory > 0 && !sysInfo.MemoryLimit {
   305  		warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   306  		logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   307  		resources.Memory = 0
   308  		resources.MemorySwap = -1
   309  	}
   310  	if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit {
   311  		warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.")
   312  		logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.")
   313  		resources.MemorySwap = -1
   314  	}
   315  	if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory {
   316  		return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage")
   317  	}
   318  	if resources.Memory == 0 && resources.MemorySwap > 0 && !update {
   319  		return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage")
   320  	}
   321  	if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness {
   322  		warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.")
   323  		logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.")
   324  		resources.MemorySwappiness = nil
   325  	}
   326  	if resources.MemorySwappiness != nil {
   327  		swappiness := *resources.MemorySwappiness
   328  		if swappiness < -1 || swappiness > 100 {
   329  			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness)
   330  		}
   331  	}
   332  	if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation {
   333  		warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   334  		logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.")
   335  		resources.MemoryReservation = 0
   336  	}
   337  	if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory {
   338  		return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB")
   339  	}
   340  	if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation {
   341  		return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage")
   342  	}
   343  	if resources.KernelMemory > 0 && !sysInfo.KernelMemory {
   344  		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   345  		logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.")
   346  		resources.KernelMemory = 0
   347  	}
   348  	if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory {
   349  		return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB")
   350  	}
   351  	if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) {
   352  		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   353  		logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
   354  	}
   355  	if resources.OomKillDisable != nil && !sysInfo.OomKillDisable {
   356  		// only produce warnings if the setting wasn't to *disable* the OOM Kill; no point
   357  		// warning the caller if they already wanted the feature to be off
   358  		if *resources.OomKillDisable {
   359  			warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   360  			logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.")
   361  		}
   362  		resources.OomKillDisable = nil
   363  	}
   364  
   365  	if resources.PidsLimit != 0 && !sysInfo.PidsLimit {
   366  		warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   367  		logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.")
   368  		resources.PidsLimit = 0
   369  	}
   370  
   371  	// cpu subsystem checks and adjustments
   372  	if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 {
   373  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set")
   374  	}
   375  	if resources.NanoCPUs > 0 && resources.CPUQuota > 0 {
   376  		return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set")
   377  	}
   378  	if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) {
   379  		return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted")
   380  	}
   381  	// The highest precision we could get on Linux is 0.001, by setting
   382  	//   cpu.cfs_period_us=1000ms
   383  	//   cpu.cfs_quota=1ms
   384  	// See the following link for details:
   385  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   386  	// Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error.
   387  	// The error message is 0.01 so that this is consistent with Windows
   388  	if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 {
   389  		return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU())
   390  	}
   391  
   392  	if resources.CPUShares > 0 && !sysInfo.CPUShares {
   393  		warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   394  		logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.")
   395  		resources.CPUShares = 0
   396  	}
   397  	if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod {
   398  		warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   399  		logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.")
   400  		resources.CPUPeriod = 0
   401  	}
   402  	if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) {
   403  		return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)")
   404  	}
   405  	if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota {
   406  		warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   407  		logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.")
   408  		resources.CPUQuota = 0
   409  	}
   410  	if resources.CPUQuota > 0 && resources.CPUQuota < 1000 {
   411  		return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)")
   412  	}
   413  	if resources.CPUPercent > 0 {
   414  		warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS))
   415  		logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)
   416  		resources.CPUPercent = 0
   417  	}
   418  
   419  	// cpuset subsystem checks and adjustments
   420  	if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset {
   421  		warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   422  		logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.")
   423  		resources.CpusetCpus = ""
   424  		resources.CpusetMems = ""
   425  	}
   426  	cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus)
   427  	if err != nil {
   428  		return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus)
   429  	}
   430  	if !cpusAvailable {
   431  		return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus)
   432  	}
   433  	memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems)
   434  	if err != nil {
   435  		return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems)
   436  	}
   437  	if !memsAvailable {
   438  		return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems)
   439  	}
   440  
   441  	// blkio subsystem checks and adjustments
   442  	if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight {
   443  		warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   444  		logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.")
   445  		resources.BlkioWeight = 0
   446  	}
   447  	if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) {
   448  		return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000")
   449  	}
   450  	if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 {
   451  		return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS)
   452  	}
   453  	if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice {
   454  		warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   455  		logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.")
   456  		resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{}
   457  	}
   458  	if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice {
   459  		warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.")
   460  		logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded")
   461  		resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{}
   462  	}
   463  	if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice {
   464  		warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   465  		logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.")
   466  		resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{}
   467  	}
   468  	if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice {
   469  		warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   470  		logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.")
   471  		resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{}
   472  	}
   473  	if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice {
   474  		warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   475  		logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.")
   476  		resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{}
   477  	}
   478  
   479  	return warnings, nil
   480  }
   481  
   482  func (daemon *Daemon) getCgroupDriver() string {
   483  	cgroupDriver := cgroupFsDriver
   484  
   485  	if UsingSystemd(daemon.configStore) {
   486  		cgroupDriver = cgroupSystemdDriver
   487  	}
   488  	return cgroupDriver
   489  }
   490  
   491  // getCD gets the raw value of the native.cgroupdriver option, if set.
   492  func getCD(config *config.Config) string {
   493  	for _, option := range config.ExecOptions {
   494  		key, val, err := parsers.ParseKeyValueOpt(option)
   495  		if err != nil || !strings.EqualFold(key, "native.cgroupdriver") {
   496  			continue
   497  		}
   498  		return val
   499  	}
   500  	return ""
   501  }
   502  
   503  // VerifyCgroupDriver validates native.cgroupdriver
   504  func VerifyCgroupDriver(config *config.Config) error {
   505  	cd := getCD(config)
   506  	if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver {
   507  		return nil
   508  	}
   509  	return fmt.Errorf("native.cgroupdriver option %s not supported", cd)
   510  }
   511  
   512  // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd
   513  func UsingSystemd(config *config.Config) bool {
   514  	return getCD(config) == cgroupSystemdDriver
   515  }
   516  
   517  // verifyPlatformContainerSettings performs platform-specific validation of the
   518  // hostconfig and config structures.
   519  func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) {
   520  	var warnings []string
   521  	sysInfo := sysinfo.New(true)
   522  
   523  	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
   524  	if err != nil {
   525  		return warnings, err
   526  	}
   527  
   528  	w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update)
   529  
   530  	// no matter err is nil or not, w could have data in itself.
   531  	warnings = append(warnings, w...)
   532  
   533  	if err != nil {
   534  		return warnings, err
   535  	}
   536  
   537  	if hostConfig.ShmSize < 0 {
   538  		return warnings, fmt.Errorf("SHM size can not be less than 0")
   539  	}
   540  
   541  	if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 {
   542  		return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj)
   543  	}
   544  
   545  	// ip-forwarding does not affect container with '--net=host' (or '--net=none')
   546  	if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) {
   547  		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
   548  		logrus.Warn("IPv4 forwarding is disabled. Networking will not work")
   549  	}
   550  	// check for various conflicting options with user namespaces
   551  	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
   552  		if hostConfig.Privileged {
   553  			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
   554  		}
   555  		if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   556  			return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled")
   557  		}
   558  		if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() {
   559  			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled")
   560  		}
   561  	}
   562  	if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) {
   563  		// CgroupParent for systemd cgroup should be named as "xxx.slice"
   564  		if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") {
   565  			return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   566  		}
   567  	}
   568  	if hostConfig.Runtime == "" {
   569  		hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
   570  	}
   571  
   572  	if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil {
   573  		return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime)
   574  	}
   575  
   576  	for dest := range hostConfig.Tmpfs {
   577  		if err := volume.ValidateTmpfsMountDestination(dest); err != nil {
   578  			return warnings, err
   579  		}
   580  	}
   581  
   582  	return warnings, nil
   583  }
   584  
   585  // reloadPlatform updates configuration with platform specific options
   586  // and updates the passed attributes
   587  func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) {
   588  	if conf.IsValueSet("runtimes") {
   589  		daemon.configStore.Runtimes = conf.Runtimes
   590  		// Always set the default one
   591  		daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   592  	}
   593  
   594  	if conf.DefaultRuntime != "" {
   595  		daemon.configStore.DefaultRuntime = conf.DefaultRuntime
   596  	}
   597  
   598  	if conf.IsValueSet("default-shm-size") {
   599  		daemon.configStore.ShmSize = conf.ShmSize
   600  	}
   601  
   602  	// Update attributes
   603  	var runtimeList bytes.Buffer
   604  	for name, rt := range daemon.configStore.Runtimes {
   605  		if runtimeList.Len() > 0 {
   606  			runtimeList.WriteRune(' ')
   607  		}
   608  		runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt))
   609  	}
   610  
   611  	attributes["runtimes"] = runtimeList.String()
   612  	attributes["default-runtime"] = daemon.configStore.DefaultRuntime
   613  	attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize)
   614  }
   615  
   616  // verifyDaemonSettings performs validation of daemon config struct
   617  func verifyDaemonSettings(conf *config.Config) error {
   618  	// Check for mutually incompatible config options
   619  	if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" {
   620  		return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one")
   621  	}
   622  	if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication {
   623  		return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true")
   624  	}
   625  	if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq {
   626  		conf.BridgeConfig.EnableIPMasq = false
   627  	}
   628  	if err := VerifyCgroupDriver(conf); err != nil {
   629  		return err
   630  	}
   631  	if conf.CgroupParent != "" && UsingSystemd(conf) {
   632  		if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") {
   633  			return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"")
   634  		}
   635  	}
   636  
   637  	if conf.DefaultRuntime == "" {
   638  		conf.DefaultRuntime = config.StockRuntimeName
   639  	}
   640  	if conf.Runtimes == nil {
   641  		conf.Runtimes = make(map[string]types.Runtime)
   642  	}
   643  	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
   644  
   645  	return nil
   646  }
   647  
   648  // checkSystem validates platform-specific requirements
   649  func checkSystem() error {
   650  	if os.Geteuid() != 0 {
   651  		return fmt.Errorf("The Docker daemon needs to be run as root")
   652  	}
   653  	return checkKernel()
   654  }
   655  
   656  // configureMaxThreads sets the Go runtime max threads threshold
   657  // which is 90% of the kernel setting from /proc/sys/kernel/threads-max
   658  func configureMaxThreads(config *config.Config) error {
   659  	mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
   660  	if err != nil {
   661  		return err
   662  	}
   663  	mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
   664  	if err != nil {
   665  		return err
   666  	}
   667  	maxThreads := (mtint / 100) * 90
   668  	debug.SetMaxThreads(maxThreads)
   669  	logrus.Debugf("Golang's threads limit set to %d", maxThreads)
   670  	return nil
   671  }
   672  
   673  func overlaySupportsSelinux() (bool, error) {
   674  	f, err := os.Open("/proc/kallsyms")
   675  	if err != nil {
   676  		if os.IsNotExist(err) {
   677  			return false, nil
   678  		}
   679  		return false, err
   680  	}
   681  	defer f.Close()
   682  
   683  	var symAddr, symType, symName, text string
   684  
   685  	s := bufio.NewScanner(f)
   686  	for s.Scan() {
   687  		if err := s.Err(); err != nil {
   688  			return false, err
   689  		}
   690  
   691  		text = s.Text()
   692  		if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil {
   693  			return false, fmt.Errorf("Scanning '%s' failed: %s", text, err)
   694  		}
   695  
   696  		// Check for presence of symbol security_inode_copy_up.
   697  		if symName == "security_inode_copy_up" {
   698  			return true, nil
   699  		}
   700  	}
   701  	return false, nil
   702  }
   703  
   704  // configureKernelSecuritySupport configures and validates security support for the kernel
   705  func configureKernelSecuritySupport(config *config.Config, driverName string) error {
   706  	if config.EnableSelinuxSupport {
   707  		if !selinuxEnabled() {
   708  			logrus.Warn("Docker could not enable SELinux on the host system")
   709  			return nil
   710  		}
   711  
   712  		if driverName == "overlay" || driverName == "overlay2" {
   713  			// If driver is overlay or overlay2, make sure kernel
   714  			// supports selinux with overlay.
   715  			supported, err := overlaySupportsSelinux()
   716  			if err != nil {
   717  				return err
   718  			}
   719  
   720  			if !supported {
   721  				logrus.Warnf("SELinux is not supported with the %s graph driver on this kernel", driverName)
   722  			}
   723  		}
   724  	} else {
   725  		selinuxSetDisabled()
   726  	}
   727  	return nil
   728  }
   729  
   730  func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) {
   731  	netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes)
   732  	if err != nil {
   733  		return nil, err
   734  	}
   735  
   736  	controller, err := libnetwork.New(netOptions...)
   737  	if err != nil {
   738  		return nil, fmt.Errorf("error obtaining controller instance: %v", err)
   739  	}
   740  
   741  	if len(activeSandboxes) > 0 {
   742  		logrus.Info("There are old running containers, the network config will not take affect")
   743  		return controller, nil
   744  	}
   745  
   746  	// Initialize default network on "null"
   747  	if n, _ := controller.NetworkByName("none"); n == nil {
   748  		if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   749  			return nil, fmt.Errorf("Error creating default \"null\" network: %v", err)
   750  		}
   751  	}
   752  
   753  	// Initialize default network on "host"
   754  	if n, _ := controller.NetworkByName("host"); n == nil {
   755  		if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil {
   756  			return nil, fmt.Errorf("Error creating default \"host\" network: %v", err)
   757  		}
   758  	}
   759  
   760  	// Clear stale bridge network
   761  	if n, err := controller.NetworkByName("bridge"); err == nil {
   762  		if err = n.Delete(); err != nil {
   763  			return nil, fmt.Errorf("could not delete the default bridge network: %v", err)
   764  		}
   765  	}
   766  
   767  	if !config.DisableBridge {
   768  		// Initialize default driver "bridge"
   769  		if err := initBridgeDriver(controller, config); err != nil {
   770  			return nil, err
   771  		}
   772  	} else {
   773  		removeDefaultBridgeInterface()
   774  	}
   775  
   776  	return controller, nil
   777  }
   778  
   779  func driverOptions(config *config.Config) []nwconfig.Option {
   780  	bridgeConfig := options.Generic{
   781  		"EnableIPForwarding":  config.BridgeConfig.EnableIPForward,
   782  		"EnableIPTables":      config.BridgeConfig.EnableIPTables,
   783  		"EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy,
   784  		"UserlandProxyPath":   config.BridgeConfig.UserlandProxyPath}
   785  	bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
   786  
   787  	dOptions := []nwconfig.Option{}
   788  	dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption))
   789  	return dOptions
   790  }
   791  
   792  func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error {
   793  	bridgeName := bridge.DefaultBridgeName
   794  	if config.BridgeConfig.Iface != "" {
   795  		bridgeName = config.BridgeConfig.Iface
   796  	}
   797  	netOption := map[string]string{
   798  		bridge.BridgeName:         bridgeName,
   799  		bridge.DefaultBridge:      strconv.FormatBool(true),
   800  		netlabel.DriverMTU:        strconv.Itoa(config.Mtu),
   801  		bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq),
   802  		bridge.EnableICC:          strconv.FormatBool(config.BridgeConfig.InterContainerCommunication),
   803  	}
   804  
   805  	// --ip processing
   806  	if config.BridgeConfig.DefaultIP != nil {
   807  		netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String()
   808  	}
   809  
   810  	var (
   811  		ipamV4Conf *libnetwork.IpamConf
   812  		ipamV6Conf *libnetwork.IpamConf
   813  	)
   814  
   815  	ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   816  
   817  	nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName)
   818  	if err != nil {
   819  		return errors.Wrap(err, "list bridge addresses failed")
   820  	}
   821  
   822  	nw := nwList[0]
   823  	if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" {
   824  		_, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
   825  		if err != nil {
   826  			return errors.Wrap(err, "parse CIDR failed")
   827  		}
   828  		// Iterate through in case there are multiple addresses for the bridge
   829  		for _, entry := range nwList {
   830  			if fCIDR.Contains(entry.IP) {
   831  				nw = entry
   832  				break
   833  			}
   834  		}
   835  	}
   836  
   837  	ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
   838  	hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
   839  	if hip.IsGlobalUnicast() {
   840  		ipamV4Conf.Gateway = nw.IP.String()
   841  	}
   842  
   843  	if config.BridgeConfig.IP != "" {
   844  		ipamV4Conf.PreferredPool = config.BridgeConfig.IP
   845  		ip, _, err := net.ParseCIDR(config.BridgeConfig.IP)
   846  		if err != nil {
   847  			return err
   848  		}
   849  		ipamV4Conf.Gateway = ip.String()
   850  	} else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" {
   851  		logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool)
   852  	}
   853  
   854  	if config.BridgeConfig.FixedCIDR != "" {
   855  		_, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR)
   856  		if err != nil {
   857  			return err
   858  		}
   859  
   860  		ipamV4Conf.SubPool = fCIDR.String()
   861  	}
   862  
   863  	if config.BridgeConfig.DefaultGatewayIPv4 != nil {
   864  		ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String()
   865  	}
   866  
   867  	var deferIPv6Alloc bool
   868  	if config.BridgeConfig.FixedCIDRv6 != "" {
   869  		_, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6)
   870  		if err != nil {
   871  			return err
   872  		}
   873  
   874  		// In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has
   875  		// at least 48 host bits, we need to guarantee the current behavior where the containers'
   876  		// IPv6 addresses will be constructed based on the containers' interface MAC address.
   877  		// We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints
   878  		// on this network until after the driver has created the endpoint and returned the
   879  		// constructed address. Libnetwork will then reserve this address with the ipam driver.
   880  		ones, _ := fCIDRv6.Mask.Size()
   881  		deferIPv6Alloc = ones <= 80
   882  
   883  		if ipamV6Conf == nil {
   884  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   885  		}
   886  		ipamV6Conf.PreferredPool = fCIDRv6.String()
   887  
   888  		// In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6
   889  		// address belongs to the same network, we need to inform libnetwork about it, so
   890  		// that it can be reserved with IPAM and it will not be given away to somebody else
   891  		for _, nw6 := range nw6List {
   892  			if fCIDRv6.Contains(nw6.IP) {
   893  				ipamV6Conf.Gateway = nw6.IP.String()
   894  				break
   895  			}
   896  		}
   897  	}
   898  
   899  	if config.BridgeConfig.DefaultGatewayIPv6 != nil {
   900  		if ipamV6Conf == nil {
   901  			ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)}
   902  		}
   903  		ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String()
   904  	}
   905  
   906  	v4Conf := []*libnetwork.IpamConf{ipamV4Conf}
   907  	v6Conf := []*libnetwork.IpamConf{}
   908  	if ipamV6Conf != nil {
   909  		v6Conf = append(v6Conf, ipamV6Conf)
   910  	}
   911  	// Initialize default network on "bridge" with the same name
   912  	_, err = controller.NewNetwork("bridge", "bridge", "",
   913  		libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6),
   914  		libnetwork.NetworkOptionDriverOpts(netOption),
   915  		libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil),
   916  		libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc))
   917  	if err != nil {
   918  		return fmt.Errorf("Error creating default \"bridge\" network: %v", err)
   919  	}
   920  	return nil
   921  }
   922  
   923  // Remove default bridge interface if present (--bridge=none use case)
   924  func removeDefaultBridgeInterface() {
   925  	if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
   926  		if err := netlink.LinkDel(lnk); err != nil {
   927  			logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
   928  		}
   929  	}
   930  }
   931  
   932  func (daemon *Daemon) getLayerInit() func(string) error {
   933  	return daemon.setupInitLayer
   934  }
   935  
   936  // Parse the remapped root (user namespace) option, which can be one of:
   937  //   username            - valid username from /etc/passwd
   938  //   username:groupname  - valid username; valid groupname from /etc/group
   939  //   uid                 - 32-bit unsigned int valid Linux UID value
   940  //   uid:gid             - uid value; 32-bit unsigned int Linux GID value
   941  //
   942  //  If no groupname is specified, and a username is specified, an attempt
   943  //  will be made to lookup a gid for that username as a groupname
   944  //
   945  //  If names are used, they are verified to exist in passwd/group
   946  func parseRemappedRoot(usergrp string) (string, string, error) {
   947  
   948  	var (
   949  		userID, groupID     int
   950  		username, groupname string
   951  	)
   952  
   953  	idparts := strings.Split(usergrp, ":")
   954  	if len(idparts) > 2 {
   955  		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
   956  	}
   957  
   958  	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
   959  		// must be a uid; take it as valid
   960  		userID = int(uid)
   961  		luser, err := idtools.LookupUID(userID)
   962  		if err != nil {
   963  			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
   964  		}
   965  		username = luser.Name
   966  		if len(idparts) == 1 {
   967  			// if the uid was numeric and no gid was specified, take the uid as the gid
   968  			groupID = userID
   969  			lgrp, err := idtools.LookupGID(groupID)
   970  			if err != nil {
   971  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
   972  			}
   973  			groupname = lgrp.Name
   974  		}
   975  	} else {
   976  		lookupName := idparts[0]
   977  		// special case: if the user specified "default", they want Docker to create or
   978  		// use (after creation) the "dockremap" user/group for root remapping
   979  		if lookupName == defaultIDSpecifier {
   980  			lookupName = defaultRemappedID
   981  		}
   982  		luser, err := idtools.LookupUser(lookupName)
   983  		if err != nil && idparts[0] != defaultIDSpecifier {
   984  			// error if the name requested isn't the special "dockremap" ID
   985  			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
   986  		} else if err != nil {
   987  			// special case-- if the username == "default", then we have been asked
   988  			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
   989  			// ranges will be used for the user and group mappings in user namespaced containers
   990  			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
   991  			if err == nil {
   992  				return defaultRemappedID, defaultRemappedID, nil
   993  			}
   994  			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
   995  		}
   996  		username = luser.Name
   997  		if len(idparts) == 1 {
   998  			// we only have a string username, and no group specified; look up gid from username as group
   999  			group, err := idtools.LookupGroup(lookupName)
  1000  			if err != nil {
  1001  				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
  1002  			}
  1003  			groupname = group.Name
  1004  		}
  1005  	}
  1006  
  1007  	if len(idparts) == 2 {
  1008  		// groupname or gid is separately specified and must be resolved
  1009  		// to an unsigned 32-bit gid
  1010  		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
  1011  			// must be a gid, take it as valid
  1012  			groupID = int(gid)
  1013  			lgrp, err := idtools.LookupGID(groupID)
  1014  			if err != nil {
  1015  				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
  1016  			}
  1017  			groupname = lgrp.Name
  1018  		} else {
  1019  			// not a number; attempt a lookup
  1020  			if _, err := idtools.LookupGroup(idparts[1]); err != nil {
  1021  				return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err)
  1022  			}
  1023  			groupname = idparts[1]
  1024  		}
  1025  	}
  1026  	return username, groupname, nil
  1027  }
  1028  
  1029  func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) {
  1030  	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
  1031  		return nil, fmt.Errorf("User namespaces are only supported on Linux")
  1032  	}
  1033  
  1034  	// if the daemon was started with remapped root option, parse
  1035  	// the config option to the int uid,gid values
  1036  	if config.RemappedRoot != "" {
  1037  		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
  1038  		if err != nil {
  1039  			return nil, err
  1040  		}
  1041  		if username == "root" {
  1042  			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
  1043  			// effectively
  1044  			logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
  1045  			return &idtools.IDMappings{}, nil
  1046  		}
  1047  		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
  1048  		// update remapped root setting now that we have resolved them to actual names
  1049  		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
  1050  
  1051  		mappings, err := idtools.NewIDMappings(username, groupname)
  1052  		if err != nil {
  1053  			return nil, errors.Wrapf(err, "Can't create ID mappings: %v")
  1054  		}
  1055  		return mappings, nil
  1056  	}
  1057  	return &idtools.IDMappings{}, nil
  1058  }
  1059  
  1060  func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error {
  1061  	config.Root = rootDir
  1062  	// the docker root metadata directory needs to have execute permissions for all users (g+x,o+x)
  1063  	// so that syscalls executing as non-root, operating on subdirectories of the graph root
  1064  	// (e.g. mounted layers of a container) can traverse this path.
  1065  	// The user namespace support will create subdirectories for the remapped root host uid:gid
  1066  	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
  1067  	// layer content subtrees.
  1068  	if _, err := os.Stat(rootDir); err == nil {
  1069  		// root current exists; verify the access bits are correct by setting them
  1070  		if err = os.Chmod(rootDir, 0711); err != nil {
  1071  			return err
  1072  		}
  1073  	} else if os.IsNotExist(err) {
  1074  		// no root exists yet, create it 0711 with root:root ownership
  1075  		if err := os.MkdirAll(rootDir, 0711); err != nil {
  1076  			return err
  1077  		}
  1078  	}
  1079  
  1080  	// if user namespaces are enabled we will create a subtree underneath the specified root
  1081  	// with any/all specified remapped root uid/gid options on the daemon creating
  1082  	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
  1083  	// `chdir()` to work for containers namespaced to that uid/gid)
  1084  	if config.RemappedRoot != "" {
  1085  		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID))
  1086  		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
  1087  		// Create the root directory if it doesn't exist
  1088  		if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil {
  1089  			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
  1090  		}
  1091  		// we also need to verify that any pre-existing directories in the path to
  1092  		// the graphroot won't block access to remapped root--if any pre-existing directory
  1093  		// has strict permissions that don't allow "x", container start will fail, so
  1094  		// better to warn and fail now
  1095  		dirPath := config.Root
  1096  		for {
  1097  			dirPath = filepath.Dir(dirPath)
  1098  			if dirPath == "/" {
  1099  				break
  1100  			}
  1101  			if !idtools.CanAccess(dirPath, rootIDs) {
  1102  				return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root)
  1103  			}
  1104  		}
  1105  	}
  1106  	return nil
  1107  }
  1108  
  1109  // registerLinks writes the links to a file.
  1110  func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
  1111  	if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() {
  1112  		return nil
  1113  	}
  1114  
  1115  	for _, l := range hostConfig.Links {
  1116  		name, alias, err := opts.ParseLink(l)
  1117  		if err != nil {
  1118  			return err
  1119  		}
  1120  		child, err := daemon.GetContainer(name)
  1121  		if err != nil {
  1122  			return fmt.Errorf("Could not get container for %s", name)
  1123  		}
  1124  		for child.HostConfig.NetworkMode.IsContainer() {
  1125  			parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2)
  1126  			child, err = daemon.GetContainer(parts[1])
  1127  			if err != nil {
  1128  				return fmt.Errorf("Could not get container for %s", parts[1])
  1129  			}
  1130  		}
  1131  		if child.HostConfig.NetworkMode.IsHost() {
  1132  			return runconfig.ErrConflictHostNetworkAndLinks
  1133  		}
  1134  		if err := daemon.registerLink(container, child, alias); err != nil {
  1135  			return err
  1136  		}
  1137  	}
  1138  
  1139  	// After we load all the links into the daemon
  1140  	// set them to nil on the hostconfig
  1141  	return container.WriteHostConfig()
  1142  }
  1143  
  1144  // conditionalMountOnStart is a platform specific helper function during the
  1145  // container start to call mount.
  1146  func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
  1147  	return daemon.Mount(container)
  1148  }
  1149  
  1150  // conditionalUnmountOnCleanup is a platform specific helper function called
  1151  // during the cleanup of a container to unmount.
  1152  func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
  1153  	return daemon.Unmount(container)
  1154  }
  1155  
  1156  func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
  1157  	if !c.IsRunning() {
  1158  		return nil, errNotRunning{c.ID}
  1159  	}
  1160  	stats, err := daemon.containerd.Stats(c.ID)
  1161  	if err != nil {
  1162  		return nil, err
  1163  	}
  1164  	s := &types.StatsJSON{}
  1165  	cgs := stats.CgroupStats
  1166  	if cgs != nil {
  1167  		s.BlkioStats = types.BlkioStats{
  1168  			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
  1169  			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
  1170  			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
  1171  			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
  1172  			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
  1173  			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
  1174  			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
  1175  			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
  1176  		}
  1177  		cpu := cgs.CpuStats
  1178  		s.CPUStats = types.CPUStats{
  1179  			CPUUsage: types.CPUUsage{
  1180  				TotalUsage:        cpu.CpuUsage.TotalUsage,
  1181  				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
  1182  				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
  1183  				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
  1184  			},
  1185  			ThrottlingData: types.ThrottlingData{
  1186  				Periods:          cpu.ThrottlingData.Periods,
  1187  				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
  1188  				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
  1189  			},
  1190  		}
  1191  		mem := cgs.MemoryStats.Usage
  1192  		s.MemoryStats = types.MemoryStats{
  1193  			Usage:    mem.Usage,
  1194  			MaxUsage: mem.MaxUsage,
  1195  			Stats:    cgs.MemoryStats.Stats,
  1196  			Failcnt:  mem.Failcnt,
  1197  			Limit:    mem.Limit,
  1198  		}
  1199  		// if the container does not set memory limit, use the machineMemory
  1200  		if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
  1201  			s.MemoryStats.Limit = daemon.machineMemory
  1202  		}
  1203  		if cgs.PidsStats != nil {
  1204  			s.PidsStats = types.PidsStats{
  1205  				Current: cgs.PidsStats.Current,
  1206  			}
  1207  		}
  1208  	}
  1209  	s.Read, err = ptypes.Timestamp(stats.Timestamp)
  1210  	if err != nil {
  1211  		return nil, err
  1212  	}
  1213  	return s, nil
  1214  }
  1215  
  1216  // setDefaultIsolation determines the default isolation mode for the
  1217  // daemon to run in. This is only applicable on Windows
  1218  func (daemon *Daemon) setDefaultIsolation() error {
  1219  	return nil
  1220  }
  1221  
  1222  func rootFSToAPIType(rootfs *image.RootFS) types.RootFS {
  1223  	var layers []string
  1224  	for _, l := range rootfs.DiffIDs {
  1225  		layers = append(layers, l.String())
  1226  	}
  1227  	return types.RootFS{
  1228  		Type:   rootfs.Type,
  1229  		Layers: layers,
  1230  	}
  1231  }
  1232  
  1233  // setupDaemonProcess sets various settings for the daemon's process
  1234  func setupDaemonProcess(config *config.Config) error {
  1235  	// setup the daemons oom_score_adj
  1236  	return setupOOMScoreAdj(config.OOMScoreAdjust)
  1237  }
  1238  
  1239  func setupOOMScoreAdj(score int) error {
  1240  	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0)
  1241  	if err != nil {
  1242  		return err
  1243  	}
  1244  	defer f.Close()
  1245  	stringScore := strconv.Itoa(score)
  1246  	_, err = f.WriteString(stringScore)
  1247  	if os.IsPermission(err) {
  1248  		// Setting oom_score_adj does not work in an
  1249  		// unprivileged container. Ignore the error, but log
  1250  		// it if we appear not to be in that situation.
  1251  		if !rsystem.RunningInUserNS() {
  1252  			logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore)
  1253  		}
  1254  		return nil
  1255  	}
  1256  
  1257  	return err
  1258  }
  1259  
  1260  func (daemon *Daemon) initCgroupsPath(path string) error {
  1261  	if path == "/" || path == "." {
  1262  		return nil
  1263  	}
  1264  
  1265  	if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 {
  1266  		return nil
  1267  	}
  1268  
  1269  	// Recursively create cgroup to ensure that the system and all parent cgroups have values set
  1270  	// for the period and runtime as this limits what the children can be set to.
  1271  	daemon.initCgroupsPath(filepath.Dir(path))
  1272  
  1273  	mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu")
  1274  	if err != nil {
  1275  		return err
  1276  	}
  1277  	// When docker is run inside docker, the root is based of the host cgroup.
  1278  	// Should this be handled in runc/libcontainer/cgroups ?
  1279  	if strings.HasPrefix(root, "/docker/") {
  1280  		root = "/"
  1281  	}
  1282  
  1283  	path = filepath.Join(mnt, root, path)
  1284  	sysinfo := sysinfo.New(true)
  1285  	if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
  1286  		return err
  1287  	}
  1288  	if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path); err != nil {
  1289  		return err
  1290  	}
  1291  	return nil
  1292  }
  1293  
  1294  func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error {
  1295  	if sysinfoPresent && configValue != 0 {
  1296  		if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
  1297  			return err
  1298  		}
  1299  		if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil {
  1300  			return err
  1301  		}
  1302  	}
  1303  	return nil
  1304  }
  1305  
  1306  func (daemon *Daemon) setupSeccompProfile() error {
  1307  	if daemon.configStore.SeccompProfile != "" {
  1308  		daemon.seccompProfilePath = daemon.configStore.SeccompProfile
  1309  		b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile)
  1310  		if err != nil {
  1311  			return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err)
  1312  		}
  1313  		daemon.seccompProfile = b
  1314  	}
  1315  	return nil
  1316  }