github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/daemon_unix.go (about) 1 // +build linux freebsd 2 3 package daemon 4 5 import ( 6 "bufio" 7 "bytes" 8 "fmt" 9 "io/ioutil" 10 "net" 11 "os" 12 "path/filepath" 13 "runtime" 14 "runtime/debug" 15 "strconv" 16 "strings" 17 "syscall" 18 "time" 19 20 "github.com/Sirupsen/logrus" 21 "github.com/docker/docker/api/types" 22 "github.com/docker/docker/api/types/blkiodev" 23 pblkiodev "github.com/docker/docker/api/types/blkiodev" 24 containertypes "github.com/docker/docker/api/types/container" 25 "github.com/docker/docker/container" 26 "github.com/docker/docker/daemon/config" 27 "github.com/docker/docker/image" 28 "github.com/docker/docker/opts" 29 "github.com/docker/docker/pkg/idtools" 30 "github.com/docker/docker/pkg/parsers" 31 "github.com/docker/docker/pkg/parsers/kernel" 32 "github.com/docker/docker/pkg/sysinfo" 33 "github.com/docker/docker/runconfig" 34 "github.com/docker/docker/volume" 35 "github.com/docker/libnetwork" 36 nwconfig "github.com/docker/libnetwork/config" 37 "github.com/docker/libnetwork/drivers/bridge" 38 "github.com/docker/libnetwork/netlabel" 39 "github.com/docker/libnetwork/netutils" 40 "github.com/docker/libnetwork/options" 41 lntypes "github.com/docker/libnetwork/types" 42 "github.com/golang/protobuf/ptypes" 43 "github.com/opencontainers/runc/libcontainer/cgroups" 44 rsystem "github.com/opencontainers/runc/libcontainer/system" 45 specs "github.com/opencontainers/runtime-spec/specs-go" 46 "github.com/opencontainers/selinux/go-selinux/label" 47 "github.com/pkg/errors" 48 "github.com/vishvananda/netlink" 49 ) 50 51 const ( 52 // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269 53 linuxMinCPUShares = 2 54 linuxMaxCPUShares = 262144 55 platformSupported = true 56 // It's not kernel limit, we want this 4M limit to supply a reasonable functional container 57 linuxMinMemory = 4194304 58 // constants for remapped root settings 59 defaultIDSpecifier string = "default" 60 defaultRemappedID string = "dockremap" 61 62 // constant for cgroup drivers 63 cgroupFsDriver = "cgroupfs" 64 cgroupSystemdDriver = "systemd" 65 ) 66 67 func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory { 68 memory := specs.LinuxMemory{} 69 70 if config.Memory > 0 { 71 limit := uint64(config.Memory) 72 memory.Limit = &limit 73 } 74 75 if config.MemoryReservation > 0 { 76 reservation := uint64(config.MemoryReservation) 77 memory.Reservation = &reservation 78 } 79 80 if config.MemorySwap > 0 { 81 swap := uint64(config.MemorySwap) 82 memory.Swap = &swap 83 } 84 85 if config.MemorySwappiness != nil { 86 swappiness := uint64(*config.MemorySwappiness) 87 memory.Swappiness = &swappiness 88 } 89 90 if config.KernelMemory != 0 { 91 kernelMemory := uint64(config.KernelMemory) 92 memory.Kernel = &kernelMemory 93 } 94 95 return &memory 96 } 97 98 func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) { 99 cpu := specs.LinuxCPU{} 100 101 if config.CPUShares < 0 { 102 return nil, fmt.Errorf("shares: invalid argument") 103 } 104 if config.CPUShares >= 0 { 105 shares := uint64(config.CPUShares) 106 cpu.Shares = &shares 107 } 108 109 if config.CpusetCpus != "" { 110 cpu.Cpus = config.CpusetCpus 111 } 112 113 if config.CpusetMems != "" { 114 cpu.Mems = config.CpusetMems 115 } 116 117 if config.NanoCPUs > 0 { 118 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 119 period := uint64(100 * time.Millisecond / time.Microsecond) 120 quota := config.NanoCPUs * int64(period) / 1e9 121 cpu.Period = &period 122 cpu.Quota = "a 123 } 124 125 if config.CPUPeriod != 0 { 126 period := uint64(config.CPUPeriod) 127 cpu.Period = &period 128 } 129 130 if config.CPUQuota != 0 { 131 q := config.CPUQuota 132 cpu.Quota = &q 133 } 134 135 if config.CPURealtimePeriod != 0 { 136 period := uint64(config.CPURealtimePeriod) 137 cpu.RealtimePeriod = &period 138 } 139 140 if config.CPURealtimeRuntime != 0 { 141 c := config.CPURealtimeRuntime 142 cpu.RealtimeRuntime = &c 143 } 144 145 return &cpu, nil 146 } 147 148 func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) { 149 var stat syscall.Stat_t 150 var blkioWeightDevices []specs.LinuxWeightDevice 151 152 for _, weightDevice := range config.BlkioWeightDevice { 153 if err := syscall.Stat(weightDevice.Path, &stat); err != nil { 154 return nil, err 155 } 156 weight := weightDevice.Weight 157 d := specs.LinuxWeightDevice{Weight: &weight} 158 d.Major = int64(stat.Rdev / 256) 159 d.Minor = int64(stat.Rdev % 256) 160 blkioWeightDevices = append(blkioWeightDevices, d) 161 } 162 163 return blkioWeightDevices, nil 164 } 165 166 func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error { 167 container.NoNewPrivileges = daemon.configStore.NoNewPrivileges 168 return parseSecurityOpt(container, hostConfig) 169 } 170 171 func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error { 172 var ( 173 labelOpts []string 174 err error 175 ) 176 177 for _, opt := range config.SecurityOpt { 178 if opt == "no-new-privileges" { 179 container.NoNewPrivileges = true 180 continue 181 } 182 if opt == "disable" { 183 labelOpts = append(labelOpts, "disable") 184 continue 185 } 186 187 var con []string 188 if strings.Contains(opt, "=") { 189 con = strings.SplitN(opt, "=", 2) 190 } else if strings.Contains(opt, ":") { 191 con = strings.SplitN(opt, ":", 2) 192 logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.") 193 } 194 if len(con) != 2 { 195 return fmt.Errorf("invalid --security-opt 1: %q", opt) 196 } 197 198 switch con[0] { 199 case "label": 200 labelOpts = append(labelOpts, con[1]) 201 case "apparmor": 202 container.AppArmorProfile = con[1] 203 case "seccomp": 204 container.SeccompProfile = con[1] 205 case "no-new-privileges": 206 noNewPrivileges, err := strconv.ParseBool(con[1]) 207 if err != nil { 208 return fmt.Errorf("invalid --security-opt 2: %q", opt) 209 } 210 container.NoNewPrivileges = noNewPrivileges 211 default: 212 return fmt.Errorf("invalid --security-opt 2: %q", opt) 213 } 214 } 215 216 container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts) 217 return err 218 } 219 220 func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) { 221 var throttleDevices []specs.LinuxThrottleDevice 222 var stat syscall.Stat_t 223 224 for _, d := range devs { 225 if err := syscall.Stat(d.Path, &stat); err != nil { 226 return nil, err 227 } 228 d := specs.LinuxThrottleDevice{Rate: d.Rate} 229 d.Major = int64(stat.Rdev / 256) 230 d.Minor = int64(stat.Rdev % 256) 231 throttleDevices = append(throttleDevices, d) 232 } 233 234 return throttleDevices, nil 235 } 236 237 func checkKernel() error { 238 // Check for unsupported kernel versions 239 // FIXME: it would be cleaner to not test for specific versions, but rather 240 // test for specific functionalities. 241 // Unfortunately we can't test for the feature "does not cause a kernel panic" 242 // without actually causing a kernel panic, so we need this workaround until 243 // the circumstances of pre-3.10 crashes are clearer. 244 // For details see https://github.com/docker/docker/issues/407 245 // Docker 1.11 and above doesn't actually run on kernels older than 3.4, 246 // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4). 247 if !kernel.CheckKernelVersion(3, 10, 0) { 248 v, _ := kernel.GetKernelVersion() 249 if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" { 250 logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String()) 251 } 252 } 253 return nil 254 } 255 256 // adaptContainerSettings is called during container creation to modify any 257 // settings necessary in the HostConfig structure. 258 func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error { 259 if adjustCPUShares && hostConfig.CPUShares > 0 { 260 // Handle unsupported CPUShares 261 if hostConfig.CPUShares < linuxMinCPUShares { 262 logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares) 263 hostConfig.CPUShares = linuxMinCPUShares 264 } else if hostConfig.CPUShares > linuxMaxCPUShares { 265 logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares) 266 hostConfig.CPUShares = linuxMaxCPUShares 267 } 268 } 269 if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 { 270 // By default, MemorySwap is set to twice the size of Memory. 271 hostConfig.MemorySwap = hostConfig.Memory * 2 272 } 273 if hostConfig.ShmSize == 0 { 274 hostConfig.ShmSize = config.DefaultShmSize 275 if daemon.configStore != nil { 276 hostConfig.ShmSize = int64(daemon.configStore.ShmSize) 277 } 278 } 279 var err error 280 opts, err := daemon.generateSecurityOpt(hostConfig) 281 if err != nil { 282 return err 283 } 284 hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...) 285 if hostConfig.MemorySwappiness == nil { 286 defaultSwappiness := int64(-1) 287 hostConfig.MemorySwappiness = &defaultSwappiness 288 } 289 if hostConfig.OomKillDisable == nil { 290 defaultOomKillDisable := false 291 hostConfig.OomKillDisable = &defaultOomKillDisable 292 } 293 294 return nil 295 } 296 297 func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) { 298 warnings := []string{} 299 300 // memory subsystem checks and adjustments 301 if resources.Memory != 0 && resources.Memory < linuxMinMemory { 302 return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB") 303 } 304 if resources.Memory > 0 && !sysInfo.MemoryLimit { 305 warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 306 logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 307 resources.Memory = 0 308 resources.MemorySwap = -1 309 } 310 if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit { 311 warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") 312 logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.") 313 resources.MemorySwap = -1 314 } 315 if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory { 316 return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage") 317 } 318 if resources.Memory == 0 && resources.MemorySwap > 0 && !update { 319 return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage") 320 } 321 if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness { 322 warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.") 323 logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.") 324 resources.MemorySwappiness = nil 325 } 326 if resources.MemorySwappiness != nil { 327 swappiness := *resources.MemorySwappiness 328 if swappiness < -1 || swappiness > 100 { 329 return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness) 330 } 331 } 332 if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation { 333 warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 334 logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 335 resources.MemoryReservation = 0 336 } 337 if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory { 338 return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB") 339 } 340 if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation { 341 return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage") 342 } 343 if resources.KernelMemory > 0 && !sysInfo.KernelMemory { 344 warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 345 logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 346 resources.KernelMemory = 0 347 } 348 if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory { 349 return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB") 350 } 351 if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) { 352 warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 353 logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 354 } 355 if resources.OomKillDisable != nil && !sysInfo.OomKillDisable { 356 // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point 357 // warning the caller if they already wanted the feature to be off 358 if *resources.OomKillDisable { 359 warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.") 360 logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.") 361 } 362 resources.OomKillDisable = nil 363 } 364 365 if resources.PidsLimit != 0 && !sysInfo.PidsLimit { 366 warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 367 logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 368 resources.PidsLimit = 0 369 } 370 371 // cpu subsystem checks and adjustments 372 if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 { 373 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set") 374 } 375 if resources.NanoCPUs > 0 && resources.CPUQuota > 0 { 376 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set") 377 } 378 if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) { 379 return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted") 380 } 381 // The highest precision we could get on Linux is 0.001, by setting 382 // cpu.cfs_period_us=1000ms 383 // cpu.cfs_quota=1ms 384 // See the following link for details: 385 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 386 // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error. 387 // The error message is 0.01 so that this is consistent with Windows 388 if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 { 389 return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU()) 390 } 391 392 if resources.CPUShares > 0 && !sysInfo.CPUShares { 393 warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 394 logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 395 resources.CPUShares = 0 396 } 397 if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod { 398 warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 399 logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 400 resources.CPUPeriod = 0 401 } 402 if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) { 403 return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)") 404 } 405 if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota { 406 warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 407 logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 408 resources.CPUQuota = 0 409 } 410 if resources.CPUQuota > 0 && resources.CPUQuota < 1000 { 411 return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)") 412 } 413 if resources.CPUPercent > 0 { 414 warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)) 415 logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS) 416 resources.CPUPercent = 0 417 } 418 419 // cpuset subsystem checks and adjustments 420 if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset { 421 warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 422 logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 423 resources.CpusetCpus = "" 424 resources.CpusetMems = "" 425 } 426 cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus) 427 if err != nil { 428 return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus) 429 } 430 if !cpusAvailable { 431 return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus) 432 } 433 memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems) 434 if err != nil { 435 return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems) 436 } 437 if !memsAvailable { 438 return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems) 439 } 440 441 // blkio subsystem checks and adjustments 442 if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight { 443 warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 444 logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 445 resources.BlkioWeight = 0 446 } 447 if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) { 448 return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000") 449 } 450 if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 { 451 return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS) 452 } 453 if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice { 454 warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 455 logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 456 resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{} 457 } 458 if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice { 459 warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.") 460 logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded") 461 resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{} 462 } 463 if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice { 464 warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 465 logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 466 resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{} 467 } 468 if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice { 469 warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 470 logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 471 resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{} 472 } 473 if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice { 474 warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 475 logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 476 resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{} 477 } 478 479 return warnings, nil 480 } 481 482 func (daemon *Daemon) getCgroupDriver() string { 483 cgroupDriver := cgroupFsDriver 484 485 if UsingSystemd(daemon.configStore) { 486 cgroupDriver = cgroupSystemdDriver 487 } 488 return cgroupDriver 489 } 490 491 // getCD gets the raw value of the native.cgroupdriver option, if set. 492 func getCD(config *config.Config) string { 493 for _, option := range config.ExecOptions { 494 key, val, err := parsers.ParseKeyValueOpt(option) 495 if err != nil || !strings.EqualFold(key, "native.cgroupdriver") { 496 continue 497 } 498 return val 499 } 500 return "" 501 } 502 503 // VerifyCgroupDriver validates native.cgroupdriver 504 func VerifyCgroupDriver(config *config.Config) error { 505 cd := getCD(config) 506 if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver { 507 return nil 508 } 509 return fmt.Errorf("native.cgroupdriver option %s not supported", cd) 510 } 511 512 // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd 513 func UsingSystemd(config *config.Config) bool { 514 return getCD(config) == cgroupSystemdDriver 515 } 516 517 // verifyPlatformContainerSettings performs platform-specific validation of the 518 // hostconfig and config structures. 519 func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) { 520 var warnings []string 521 sysInfo := sysinfo.New(true) 522 523 warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config) 524 if err != nil { 525 return warnings, err 526 } 527 528 w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update) 529 530 // no matter err is nil or not, w could have data in itself. 531 warnings = append(warnings, w...) 532 533 if err != nil { 534 return warnings, err 535 } 536 537 if hostConfig.ShmSize < 0 { 538 return warnings, fmt.Errorf("SHM size can not be less than 0") 539 } 540 541 if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 { 542 return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj) 543 } 544 545 // ip-forwarding does not affect container with '--net=host' (or '--net=none') 546 if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) { 547 warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") 548 logrus.Warn("IPv4 forwarding is disabled. Networking will not work") 549 } 550 // check for various conflicting options with user namespaces 551 if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() { 552 if hostConfig.Privileged { 553 return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces") 554 } 555 if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 556 return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled") 557 } 558 if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 559 return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled") 560 } 561 } 562 if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) { 563 // CgroupParent for systemd cgroup should be named as "xxx.slice" 564 if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") { 565 return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 566 } 567 } 568 if hostConfig.Runtime == "" { 569 hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName() 570 } 571 572 if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil { 573 return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime) 574 } 575 576 for dest := range hostConfig.Tmpfs { 577 if err := volume.ValidateTmpfsMountDestination(dest); err != nil { 578 return warnings, err 579 } 580 } 581 582 return warnings, nil 583 } 584 585 // reloadPlatform updates configuration with platform specific options 586 // and updates the passed attributes 587 func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) { 588 if conf.IsValueSet("runtimes") { 589 daemon.configStore.Runtimes = conf.Runtimes 590 // Always set the default one 591 daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 592 } 593 594 if conf.DefaultRuntime != "" { 595 daemon.configStore.DefaultRuntime = conf.DefaultRuntime 596 } 597 598 if conf.IsValueSet("default-shm-size") { 599 daemon.configStore.ShmSize = conf.ShmSize 600 } 601 602 // Update attributes 603 var runtimeList bytes.Buffer 604 for name, rt := range daemon.configStore.Runtimes { 605 if runtimeList.Len() > 0 { 606 runtimeList.WriteRune(' ') 607 } 608 runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt)) 609 } 610 611 attributes["runtimes"] = runtimeList.String() 612 attributes["default-runtime"] = daemon.configStore.DefaultRuntime 613 attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize) 614 } 615 616 // verifyDaemonSettings performs validation of daemon config struct 617 func verifyDaemonSettings(conf *config.Config) error { 618 // Check for mutually incompatible config options 619 if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" { 620 return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one") 621 } 622 if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication { 623 return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true") 624 } 625 if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq { 626 conf.BridgeConfig.EnableIPMasq = false 627 } 628 if err := VerifyCgroupDriver(conf); err != nil { 629 return err 630 } 631 if conf.CgroupParent != "" && UsingSystemd(conf) { 632 if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") { 633 return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 634 } 635 } 636 637 if conf.DefaultRuntime == "" { 638 conf.DefaultRuntime = config.StockRuntimeName 639 } 640 if conf.Runtimes == nil { 641 conf.Runtimes = make(map[string]types.Runtime) 642 } 643 conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 644 645 return nil 646 } 647 648 // checkSystem validates platform-specific requirements 649 func checkSystem() error { 650 if os.Geteuid() != 0 { 651 return fmt.Errorf("The Docker daemon needs to be run as root") 652 } 653 return checkKernel() 654 } 655 656 // configureMaxThreads sets the Go runtime max threads threshold 657 // which is 90% of the kernel setting from /proc/sys/kernel/threads-max 658 func configureMaxThreads(config *config.Config) error { 659 mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") 660 if err != nil { 661 return err 662 } 663 mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) 664 if err != nil { 665 return err 666 } 667 maxThreads := (mtint / 100) * 90 668 debug.SetMaxThreads(maxThreads) 669 logrus.Debugf("Golang's threads limit set to %d", maxThreads) 670 return nil 671 } 672 673 func overlaySupportsSelinux() (bool, error) { 674 f, err := os.Open("/proc/kallsyms") 675 if err != nil { 676 if os.IsNotExist(err) { 677 return false, nil 678 } 679 return false, err 680 } 681 defer f.Close() 682 683 var symAddr, symType, symName, text string 684 685 s := bufio.NewScanner(f) 686 for s.Scan() { 687 if err := s.Err(); err != nil { 688 return false, err 689 } 690 691 text = s.Text() 692 if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil { 693 return false, fmt.Errorf("Scanning '%s' failed: %s", text, err) 694 } 695 696 // Check for presence of symbol security_inode_copy_up. 697 if symName == "security_inode_copy_up" { 698 return true, nil 699 } 700 } 701 return false, nil 702 } 703 704 // configureKernelSecuritySupport configures and validates security support for the kernel 705 func configureKernelSecuritySupport(config *config.Config, driverName string) error { 706 if config.EnableSelinuxSupport { 707 if !selinuxEnabled() { 708 logrus.Warn("Docker could not enable SELinux on the host system") 709 return nil 710 } 711 712 if driverName == "overlay" || driverName == "overlay2" { 713 // If driver is overlay or overlay2, make sure kernel 714 // supports selinux with overlay. 715 supported, err := overlaySupportsSelinux() 716 if err != nil { 717 return err 718 } 719 720 if !supported { 721 logrus.Warnf("SELinux is not supported with the %s graph driver on this kernel", driverName) 722 } 723 } 724 } else { 725 selinuxSetDisabled() 726 } 727 return nil 728 } 729 730 func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) { 731 netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes) 732 if err != nil { 733 return nil, err 734 } 735 736 controller, err := libnetwork.New(netOptions...) 737 if err != nil { 738 return nil, fmt.Errorf("error obtaining controller instance: %v", err) 739 } 740 741 if len(activeSandboxes) > 0 { 742 logrus.Info("There are old running containers, the network config will not take affect") 743 return controller, nil 744 } 745 746 // Initialize default network on "null" 747 if n, _ := controller.NetworkByName("none"); n == nil { 748 if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil { 749 return nil, fmt.Errorf("Error creating default \"null\" network: %v", err) 750 } 751 } 752 753 // Initialize default network on "host" 754 if n, _ := controller.NetworkByName("host"); n == nil { 755 if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil { 756 return nil, fmt.Errorf("Error creating default \"host\" network: %v", err) 757 } 758 } 759 760 // Clear stale bridge network 761 if n, err := controller.NetworkByName("bridge"); err == nil { 762 if err = n.Delete(); err != nil { 763 return nil, fmt.Errorf("could not delete the default bridge network: %v", err) 764 } 765 } 766 767 if !config.DisableBridge { 768 // Initialize default driver "bridge" 769 if err := initBridgeDriver(controller, config); err != nil { 770 return nil, err 771 } 772 } else { 773 removeDefaultBridgeInterface() 774 } 775 776 return controller, nil 777 } 778 779 func driverOptions(config *config.Config) []nwconfig.Option { 780 bridgeConfig := options.Generic{ 781 "EnableIPForwarding": config.BridgeConfig.EnableIPForward, 782 "EnableIPTables": config.BridgeConfig.EnableIPTables, 783 "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy, 784 "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath} 785 bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig} 786 787 dOptions := []nwconfig.Option{} 788 dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption)) 789 return dOptions 790 } 791 792 func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error { 793 bridgeName := bridge.DefaultBridgeName 794 if config.BridgeConfig.Iface != "" { 795 bridgeName = config.BridgeConfig.Iface 796 } 797 netOption := map[string]string{ 798 bridge.BridgeName: bridgeName, 799 bridge.DefaultBridge: strconv.FormatBool(true), 800 netlabel.DriverMTU: strconv.Itoa(config.Mtu), 801 bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq), 802 bridge.EnableICC: strconv.FormatBool(config.BridgeConfig.InterContainerCommunication), 803 } 804 805 // --ip processing 806 if config.BridgeConfig.DefaultIP != nil { 807 netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String() 808 } 809 810 var ( 811 ipamV4Conf *libnetwork.IpamConf 812 ipamV6Conf *libnetwork.IpamConf 813 ) 814 815 ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 816 817 nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName) 818 if err != nil { 819 return errors.Wrap(err, "list bridge addresses failed") 820 } 821 822 nw := nwList[0] 823 if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" { 824 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 825 if err != nil { 826 return errors.Wrap(err, "parse CIDR failed") 827 } 828 // Iterate through in case there are multiple addresses for the bridge 829 for _, entry := range nwList { 830 if fCIDR.Contains(entry.IP) { 831 nw = entry 832 break 833 } 834 } 835 } 836 837 ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() 838 hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) 839 if hip.IsGlobalUnicast() { 840 ipamV4Conf.Gateway = nw.IP.String() 841 } 842 843 if config.BridgeConfig.IP != "" { 844 ipamV4Conf.PreferredPool = config.BridgeConfig.IP 845 ip, _, err := net.ParseCIDR(config.BridgeConfig.IP) 846 if err != nil { 847 return err 848 } 849 ipamV4Conf.Gateway = ip.String() 850 } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" { 851 logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool) 852 } 853 854 if config.BridgeConfig.FixedCIDR != "" { 855 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 856 if err != nil { 857 return err 858 } 859 860 ipamV4Conf.SubPool = fCIDR.String() 861 } 862 863 if config.BridgeConfig.DefaultGatewayIPv4 != nil { 864 ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String() 865 } 866 867 var deferIPv6Alloc bool 868 if config.BridgeConfig.FixedCIDRv6 != "" { 869 _, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6) 870 if err != nil { 871 return err 872 } 873 874 // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has 875 // at least 48 host bits, we need to guarantee the current behavior where the containers' 876 // IPv6 addresses will be constructed based on the containers' interface MAC address. 877 // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints 878 // on this network until after the driver has created the endpoint and returned the 879 // constructed address. Libnetwork will then reserve this address with the ipam driver. 880 ones, _ := fCIDRv6.Mask.Size() 881 deferIPv6Alloc = ones <= 80 882 883 if ipamV6Conf == nil { 884 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 885 } 886 ipamV6Conf.PreferredPool = fCIDRv6.String() 887 888 // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6 889 // address belongs to the same network, we need to inform libnetwork about it, so 890 // that it can be reserved with IPAM and it will not be given away to somebody else 891 for _, nw6 := range nw6List { 892 if fCIDRv6.Contains(nw6.IP) { 893 ipamV6Conf.Gateway = nw6.IP.String() 894 break 895 } 896 } 897 } 898 899 if config.BridgeConfig.DefaultGatewayIPv6 != nil { 900 if ipamV6Conf == nil { 901 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 902 } 903 ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String() 904 } 905 906 v4Conf := []*libnetwork.IpamConf{ipamV4Conf} 907 v6Conf := []*libnetwork.IpamConf{} 908 if ipamV6Conf != nil { 909 v6Conf = append(v6Conf, ipamV6Conf) 910 } 911 // Initialize default network on "bridge" with the same name 912 _, err = controller.NewNetwork("bridge", "bridge", "", 913 libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6), 914 libnetwork.NetworkOptionDriverOpts(netOption), 915 libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil), 916 libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc)) 917 if err != nil { 918 return fmt.Errorf("Error creating default \"bridge\" network: %v", err) 919 } 920 return nil 921 } 922 923 // Remove default bridge interface if present (--bridge=none use case) 924 func removeDefaultBridgeInterface() { 925 if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil { 926 if err := netlink.LinkDel(lnk); err != nil { 927 logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err) 928 } 929 } 930 } 931 932 func (daemon *Daemon) getLayerInit() func(string) error { 933 return daemon.setupInitLayer 934 } 935 936 // Parse the remapped root (user namespace) option, which can be one of: 937 // username - valid username from /etc/passwd 938 // username:groupname - valid username; valid groupname from /etc/group 939 // uid - 32-bit unsigned int valid Linux UID value 940 // uid:gid - uid value; 32-bit unsigned int Linux GID value 941 // 942 // If no groupname is specified, and a username is specified, an attempt 943 // will be made to lookup a gid for that username as a groupname 944 // 945 // If names are used, they are verified to exist in passwd/group 946 func parseRemappedRoot(usergrp string) (string, string, error) { 947 948 var ( 949 userID, groupID int 950 username, groupname string 951 ) 952 953 idparts := strings.Split(usergrp, ":") 954 if len(idparts) > 2 { 955 return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) 956 } 957 958 if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { 959 // must be a uid; take it as valid 960 userID = int(uid) 961 luser, err := idtools.LookupUID(userID) 962 if err != nil { 963 return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) 964 } 965 username = luser.Name 966 if len(idparts) == 1 { 967 // if the uid was numeric and no gid was specified, take the uid as the gid 968 groupID = userID 969 lgrp, err := idtools.LookupGID(groupID) 970 if err != nil { 971 return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) 972 } 973 groupname = lgrp.Name 974 } 975 } else { 976 lookupName := idparts[0] 977 // special case: if the user specified "default", they want Docker to create or 978 // use (after creation) the "dockremap" user/group for root remapping 979 if lookupName == defaultIDSpecifier { 980 lookupName = defaultRemappedID 981 } 982 luser, err := idtools.LookupUser(lookupName) 983 if err != nil && idparts[0] != defaultIDSpecifier { 984 // error if the name requested isn't the special "dockremap" ID 985 return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) 986 } else if err != nil { 987 // special case-- if the username == "default", then we have been asked 988 // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} 989 // ranges will be used for the user and group mappings in user namespaced containers 990 _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) 991 if err == nil { 992 return defaultRemappedID, defaultRemappedID, nil 993 } 994 return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) 995 } 996 username = luser.Name 997 if len(idparts) == 1 { 998 // we only have a string username, and no group specified; look up gid from username as group 999 group, err := idtools.LookupGroup(lookupName) 1000 if err != nil { 1001 return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) 1002 } 1003 groupname = group.Name 1004 } 1005 } 1006 1007 if len(idparts) == 2 { 1008 // groupname or gid is separately specified and must be resolved 1009 // to an unsigned 32-bit gid 1010 if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { 1011 // must be a gid, take it as valid 1012 groupID = int(gid) 1013 lgrp, err := idtools.LookupGID(groupID) 1014 if err != nil { 1015 return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) 1016 } 1017 groupname = lgrp.Name 1018 } else { 1019 // not a number; attempt a lookup 1020 if _, err := idtools.LookupGroup(idparts[1]); err != nil { 1021 return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err) 1022 } 1023 groupname = idparts[1] 1024 } 1025 } 1026 return username, groupname, nil 1027 } 1028 1029 func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) { 1030 if runtime.GOOS != "linux" && config.RemappedRoot != "" { 1031 return nil, fmt.Errorf("User namespaces are only supported on Linux") 1032 } 1033 1034 // if the daemon was started with remapped root option, parse 1035 // the config option to the int uid,gid values 1036 if config.RemappedRoot != "" { 1037 username, groupname, err := parseRemappedRoot(config.RemappedRoot) 1038 if err != nil { 1039 return nil, err 1040 } 1041 if username == "root" { 1042 // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op 1043 // effectively 1044 logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF") 1045 return &idtools.IDMappings{}, nil 1046 } 1047 logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) 1048 // update remapped root setting now that we have resolved them to actual names 1049 config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) 1050 1051 mappings, err := idtools.NewIDMappings(username, groupname) 1052 if err != nil { 1053 return nil, errors.Wrapf(err, "Can't create ID mappings: %v") 1054 } 1055 return mappings, nil 1056 } 1057 return &idtools.IDMappings{}, nil 1058 } 1059 1060 func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error { 1061 config.Root = rootDir 1062 // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x) 1063 // so that syscalls executing as non-root, operating on subdirectories of the graph root 1064 // (e.g. mounted layers of a container) can traverse this path. 1065 // The user namespace support will create subdirectories for the remapped root host uid:gid 1066 // pair owned by that same uid:gid pair for proper write access to those needed metadata and 1067 // layer content subtrees. 1068 if _, err := os.Stat(rootDir); err == nil { 1069 // root current exists; verify the access bits are correct by setting them 1070 if err = os.Chmod(rootDir, 0711); err != nil { 1071 return err 1072 } 1073 } else if os.IsNotExist(err) { 1074 // no root exists yet, create it 0711 with root:root ownership 1075 if err := os.MkdirAll(rootDir, 0711); err != nil { 1076 return err 1077 } 1078 } 1079 1080 // if user namespaces are enabled we will create a subtree underneath the specified root 1081 // with any/all specified remapped root uid/gid options on the daemon creating 1082 // a new subdirectory with ownership set to the remapped uid/gid (so as to allow 1083 // `chdir()` to work for containers namespaced to that uid/gid) 1084 if config.RemappedRoot != "" { 1085 config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID)) 1086 logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) 1087 // Create the root directory if it doesn't exist 1088 if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil { 1089 return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) 1090 } 1091 // we also need to verify that any pre-existing directories in the path to 1092 // the graphroot won't block access to remapped root--if any pre-existing directory 1093 // has strict permissions that don't allow "x", container start will fail, so 1094 // better to warn and fail now 1095 dirPath := config.Root 1096 for { 1097 dirPath = filepath.Dir(dirPath) 1098 if dirPath == "/" { 1099 break 1100 } 1101 if !idtools.CanAccess(dirPath, rootIDs) { 1102 return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root) 1103 } 1104 } 1105 } 1106 return nil 1107 } 1108 1109 // registerLinks writes the links to a file. 1110 func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error { 1111 if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() { 1112 return nil 1113 } 1114 1115 for _, l := range hostConfig.Links { 1116 name, alias, err := opts.ParseLink(l) 1117 if err != nil { 1118 return err 1119 } 1120 child, err := daemon.GetContainer(name) 1121 if err != nil { 1122 return fmt.Errorf("Could not get container for %s", name) 1123 } 1124 for child.HostConfig.NetworkMode.IsContainer() { 1125 parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2) 1126 child, err = daemon.GetContainer(parts[1]) 1127 if err != nil { 1128 return fmt.Errorf("Could not get container for %s", parts[1]) 1129 } 1130 } 1131 if child.HostConfig.NetworkMode.IsHost() { 1132 return runconfig.ErrConflictHostNetworkAndLinks 1133 } 1134 if err := daemon.registerLink(container, child, alias); err != nil { 1135 return err 1136 } 1137 } 1138 1139 // After we load all the links into the daemon 1140 // set them to nil on the hostconfig 1141 return container.WriteHostConfig() 1142 } 1143 1144 // conditionalMountOnStart is a platform specific helper function during the 1145 // container start to call mount. 1146 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { 1147 return daemon.Mount(container) 1148 } 1149 1150 // conditionalUnmountOnCleanup is a platform specific helper function called 1151 // during the cleanup of a container to unmount. 1152 func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { 1153 return daemon.Unmount(container) 1154 } 1155 1156 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { 1157 if !c.IsRunning() { 1158 return nil, errNotRunning{c.ID} 1159 } 1160 stats, err := daemon.containerd.Stats(c.ID) 1161 if err != nil { 1162 return nil, err 1163 } 1164 s := &types.StatsJSON{} 1165 cgs := stats.CgroupStats 1166 if cgs != nil { 1167 s.BlkioStats = types.BlkioStats{ 1168 IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive), 1169 IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive), 1170 IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive), 1171 IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive), 1172 IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive), 1173 IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive), 1174 IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive), 1175 SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive), 1176 } 1177 cpu := cgs.CpuStats 1178 s.CPUStats = types.CPUStats{ 1179 CPUUsage: types.CPUUsage{ 1180 TotalUsage: cpu.CpuUsage.TotalUsage, 1181 PercpuUsage: cpu.CpuUsage.PercpuUsage, 1182 UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode, 1183 UsageInUsermode: cpu.CpuUsage.UsageInUsermode, 1184 }, 1185 ThrottlingData: types.ThrottlingData{ 1186 Periods: cpu.ThrottlingData.Periods, 1187 ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods, 1188 ThrottledTime: cpu.ThrottlingData.ThrottledTime, 1189 }, 1190 } 1191 mem := cgs.MemoryStats.Usage 1192 s.MemoryStats = types.MemoryStats{ 1193 Usage: mem.Usage, 1194 MaxUsage: mem.MaxUsage, 1195 Stats: cgs.MemoryStats.Stats, 1196 Failcnt: mem.Failcnt, 1197 Limit: mem.Limit, 1198 } 1199 // if the container does not set memory limit, use the machineMemory 1200 if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 { 1201 s.MemoryStats.Limit = daemon.machineMemory 1202 } 1203 if cgs.PidsStats != nil { 1204 s.PidsStats = types.PidsStats{ 1205 Current: cgs.PidsStats.Current, 1206 } 1207 } 1208 } 1209 s.Read, err = ptypes.Timestamp(stats.Timestamp) 1210 if err != nil { 1211 return nil, err 1212 } 1213 return s, nil 1214 } 1215 1216 // setDefaultIsolation determines the default isolation mode for the 1217 // daemon to run in. This is only applicable on Windows 1218 func (daemon *Daemon) setDefaultIsolation() error { 1219 return nil 1220 } 1221 1222 func rootFSToAPIType(rootfs *image.RootFS) types.RootFS { 1223 var layers []string 1224 for _, l := range rootfs.DiffIDs { 1225 layers = append(layers, l.String()) 1226 } 1227 return types.RootFS{ 1228 Type: rootfs.Type, 1229 Layers: layers, 1230 } 1231 } 1232 1233 // setupDaemonProcess sets various settings for the daemon's process 1234 func setupDaemonProcess(config *config.Config) error { 1235 // setup the daemons oom_score_adj 1236 return setupOOMScoreAdj(config.OOMScoreAdjust) 1237 } 1238 1239 func setupOOMScoreAdj(score int) error { 1240 f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0) 1241 if err != nil { 1242 return err 1243 } 1244 defer f.Close() 1245 stringScore := strconv.Itoa(score) 1246 _, err = f.WriteString(stringScore) 1247 if os.IsPermission(err) { 1248 // Setting oom_score_adj does not work in an 1249 // unprivileged container. Ignore the error, but log 1250 // it if we appear not to be in that situation. 1251 if !rsystem.RunningInUserNS() { 1252 logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore) 1253 } 1254 return nil 1255 } 1256 1257 return err 1258 } 1259 1260 func (daemon *Daemon) initCgroupsPath(path string) error { 1261 if path == "/" || path == "." { 1262 return nil 1263 } 1264 1265 if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 { 1266 return nil 1267 } 1268 1269 // Recursively create cgroup to ensure that the system and all parent cgroups have values set 1270 // for the period and runtime as this limits what the children can be set to. 1271 daemon.initCgroupsPath(filepath.Dir(path)) 1272 1273 mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu") 1274 if err != nil { 1275 return err 1276 } 1277 // When docker is run inside docker, the root is based of the host cgroup. 1278 // Should this be handled in runc/libcontainer/cgroups ? 1279 if strings.HasPrefix(root, "/docker/") { 1280 root = "/" 1281 } 1282 1283 path = filepath.Join(mnt, root, path) 1284 sysinfo := sysinfo.New(true) 1285 if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil { 1286 return err 1287 } 1288 if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path); err != nil { 1289 return err 1290 } 1291 return nil 1292 } 1293 1294 func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error { 1295 if sysinfoPresent && configValue != 0 { 1296 if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { 1297 return err 1298 } 1299 if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil { 1300 return err 1301 } 1302 } 1303 return nil 1304 } 1305 1306 func (daemon *Daemon) setupSeccompProfile() error { 1307 if daemon.configStore.SeccompProfile != "" { 1308 daemon.seccompProfilePath = daemon.configStore.SeccompProfile 1309 b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile) 1310 if err != nil { 1311 return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err) 1312 } 1313 daemon.seccompProfile = b 1314 } 1315 return nil 1316 }