github.com/rhatdan/docker@v0.7.7-0.20180119204836-47a0dcbcd20a/daemon/daemon_unix.go (about) 1 // +build linux freebsd 2 3 package daemon 4 5 import ( 6 "bufio" 7 "bytes" 8 "context" 9 "fmt" 10 "io/ioutil" 11 "net" 12 "os" 13 "path/filepath" 14 "runtime" 15 "runtime/debug" 16 "strconv" 17 "strings" 18 "time" 19 20 containerd_cgroups "github.com/containerd/cgroups" 21 "github.com/docker/docker/api/types" 22 "github.com/docker/docker/api/types/blkiodev" 23 pblkiodev "github.com/docker/docker/api/types/blkiodev" 24 containertypes "github.com/docker/docker/api/types/container" 25 "github.com/docker/docker/container" 26 "github.com/docker/docker/daemon/config" 27 "github.com/docker/docker/image" 28 "github.com/docker/docker/opts" 29 "github.com/docker/docker/pkg/containerfs" 30 "github.com/docker/docker/pkg/idtools" 31 "github.com/docker/docker/pkg/ioutils" 32 "github.com/docker/docker/pkg/parsers" 33 "github.com/docker/docker/pkg/parsers/kernel" 34 "github.com/docker/docker/pkg/sysinfo" 35 "github.com/docker/docker/runconfig" 36 "github.com/docker/docker/volume" 37 "github.com/docker/libnetwork" 38 nwconfig "github.com/docker/libnetwork/config" 39 "github.com/docker/libnetwork/drivers/bridge" 40 "github.com/docker/libnetwork/netlabel" 41 "github.com/docker/libnetwork/netutils" 42 "github.com/docker/libnetwork/options" 43 lntypes "github.com/docker/libnetwork/types" 44 "github.com/opencontainers/runc/libcontainer/cgroups" 45 rsystem "github.com/opencontainers/runc/libcontainer/system" 46 specs "github.com/opencontainers/runtime-spec/specs-go" 47 "github.com/opencontainers/selinux/go-selinux/label" 48 "github.com/pkg/errors" 49 "github.com/sirupsen/logrus" 50 "github.com/vishvananda/netlink" 51 "golang.org/x/sys/unix" 52 ) 53 54 const ( 55 // DefaultShimBinary is the default shim to be used by containerd if none 56 // is specified 57 DefaultShimBinary = "docker-containerd-shim" 58 59 // DefaultRuntimeBinary is the default runtime to be used by 60 // containerd if none is specified 61 DefaultRuntimeBinary = "docker-runc" 62 63 // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269 64 linuxMinCPUShares = 2 65 linuxMaxCPUShares = 262144 66 platformSupported = true 67 // It's not kernel limit, we want this 4M limit to supply a reasonable functional container 68 linuxMinMemory = 4194304 69 // constants for remapped root settings 70 defaultIDSpecifier string = "default" 71 defaultRemappedID string = "dockremap" 72 73 // constant for cgroup drivers 74 cgroupFsDriver = "cgroupfs" 75 cgroupSystemdDriver = "systemd" 76 77 // DefaultRuntimeName is the default runtime to be used by 78 // containerd if none is specified 79 DefaultRuntimeName = "docker-runc" 80 ) 81 82 type containerGetter interface { 83 GetContainer(string) (*container.Container, error) 84 } 85 86 func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory { 87 memory := specs.LinuxMemory{} 88 89 if config.Memory > 0 { 90 memory.Limit = &config.Memory 91 } 92 93 if config.MemoryReservation > 0 { 94 memory.Reservation = &config.MemoryReservation 95 } 96 97 if config.MemorySwap > 0 { 98 memory.Swap = &config.MemorySwap 99 } 100 101 if config.MemorySwappiness != nil { 102 swappiness := uint64(*config.MemorySwappiness) 103 memory.Swappiness = &swappiness 104 } 105 106 if config.KernelMemory != 0 { 107 memory.Kernel = &config.KernelMemory 108 } 109 110 return &memory 111 } 112 113 func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) { 114 cpu := specs.LinuxCPU{} 115 116 if config.CPUShares < 0 { 117 return nil, fmt.Errorf("shares: invalid argument") 118 } 119 if config.CPUShares >= 0 { 120 shares := uint64(config.CPUShares) 121 cpu.Shares = &shares 122 } 123 124 if config.CpusetCpus != "" { 125 cpu.Cpus = config.CpusetCpus 126 } 127 128 if config.CpusetMems != "" { 129 cpu.Mems = config.CpusetMems 130 } 131 132 if config.NanoCPUs > 0 { 133 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 134 period := uint64(100 * time.Millisecond / time.Microsecond) 135 quota := config.NanoCPUs * int64(period) / 1e9 136 cpu.Period = &period 137 cpu.Quota = "a 138 } 139 140 if config.CPUPeriod != 0 { 141 period := uint64(config.CPUPeriod) 142 cpu.Period = &period 143 } 144 145 if config.CPUQuota != 0 { 146 q := config.CPUQuota 147 cpu.Quota = &q 148 } 149 150 if config.CPURealtimePeriod != 0 { 151 period := uint64(config.CPURealtimePeriod) 152 cpu.RealtimePeriod = &period 153 } 154 155 if config.CPURealtimeRuntime != 0 { 156 c := config.CPURealtimeRuntime 157 cpu.RealtimeRuntime = &c 158 } 159 160 return &cpu, nil 161 } 162 163 func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) { 164 var stat unix.Stat_t 165 var blkioWeightDevices []specs.LinuxWeightDevice 166 167 for _, weightDevice := range config.BlkioWeightDevice { 168 if err := unix.Stat(weightDevice.Path, &stat); err != nil { 169 return nil, err 170 } 171 weight := weightDevice.Weight 172 d := specs.LinuxWeightDevice{Weight: &weight} 173 d.Major = int64(stat.Rdev / 256) 174 d.Minor = int64(stat.Rdev % 256) 175 blkioWeightDevices = append(blkioWeightDevices, d) 176 } 177 178 return blkioWeightDevices, nil 179 } 180 181 func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error { 182 container.NoNewPrivileges = daemon.configStore.NoNewPrivileges 183 return parseSecurityOpt(container, hostConfig) 184 } 185 186 func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error { 187 var ( 188 labelOpts []string 189 err error 190 ) 191 192 for _, opt := range config.SecurityOpt { 193 if opt == "no-new-privileges" { 194 container.NoNewPrivileges = true 195 continue 196 } 197 if opt == "disable" { 198 labelOpts = append(labelOpts, "disable") 199 continue 200 } 201 202 var con []string 203 if strings.Contains(opt, "=") { 204 con = strings.SplitN(opt, "=", 2) 205 } else if strings.Contains(opt, ":") { 206 con = strings.SplitN(opt, ":", 2) 207 logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.") 208 } 209 if len(con) != 2 { 210 return fmt.Errorf("invalid --security-opt 1: %q", opt) 211 } 212 213 switch con[0] { 214 case "label": 215 labelOpts = append(labelOpts, con[1]) 216 case "apparmor": 217 container.AppArmorProfile = con[1] 218 case "seccomp": 219 container.SeccompProfile = con[1] 220 case "no-new-privileges": 221 noNewPrivileges, err := strconv.ParseBool(con[1]) 222 if err != nil { 223 return fmt.Errorf("invalid --security-opt 2: %q", opt) 224 } 225 container.NoNewPrivileges = noNewPrivileges 226 default: 227 return fmt.Errorf("invalid --security-opt 2: %q", opt) 228 } 229 } 230 231 container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts) 232 return err 233 } 234 235 func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) { 236 var throttleDevices []specs.LinuxThrottleDevice 237 var stat unix.Stat_t 238 239 for _, d := range devs { 240 if err := unix.Stat(d.Path, &stat); err != nil { 241 return nil, err 242 } 243 d := specs.LinuxThrottleDevice{Rate: d.Rate} 244 d.Major = int64(stat.Rdev / 256) 245 d.Minor = int64(stat.Rdev % 256) 246 throttleDevices = append(throttleDevices, d) 247 } 248 249 return throttleDevices, nil 250 } 251 252 func checkKernel() error { 253 // Check for unsupported kernel versions 254 // FIXME: it would be cleaner to not test for specific versions, but rather 255 // test for specific functionalities. 256 // Unfortunately we can't test for the feature "does not cause a kernel panic" 257 // without actually causing a kernel panic, so we need this workaround until 258 // the circumstances of pre-3.10 crashes are clearer. 259 // For details see https://github.com/docker/docker/issues/407 260 // Docker 1.11 and above doesn't actually run on kernels older than 3.4, 261 // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4). 262 if !kernel.CheckKernelVersion(3, 10, 0) { 263 v, _ := kernel.GetKernelVersion() 264 if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" { 265 logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String()) 266 } 267 } 268 return nil 269 } 270 271 // adaptContainerSettings is called during container creation to modify any 272 // settings necessary in the HostConfig structure. 273 func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error { 274 if adjustCPUShares && hostConfig.CPUShares > 0 { 275 // Handle unsupported CPUShares 276 if hostConfig.CPUShares < linuxMinCPUShares { 277 logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares) 278 hostConfig.CPUShares = linuxMinCPUShares 279 } else if hostConfig.CPUShares > linuxMaxCPUShares { 280 logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares) 281 hostConfig.CPUShares = linuxMaxCPUShares 282 } 283 } 284 if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 { 285 // By default, MemorySwap is set to twice the size of Memory. 286 hostConfig.MemorySwap = hostConfig.Memory * 2 287 } 288 if hostConfig.ShmSize == 0 { 289 hostConfig.ShmSize = config.DefaultShmSize 290 if daemon.configStore != nil { 291 hostConfig.ShmSize = int64(daemon.configStore.ShmSize) 292 } 293 } 294 // Set default IPC mode, if unset for container 295 if hostConfig.IpcMode.IsEmpty() { 296 m := config.DefaultIpcMode 297 if daemon.configStore != nil { 298 m = daemon.configStore.IpcMode 299 } 300 hostConfig.IpcMode = containertypes.IpcMode(m) 301 } 302 303 adaptSharedNamespaceContainer(daemon, hostConfig) 304 305 var err error 306 opts, err := daemon.generateSecurityOpt(hostConfig) 307 if err != nil { 308 return err 309 } 310 hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...) 311 if hostConfig.OomKillDisable == nil { 312 defaultOomKillDisable := false 313 hostConfig.OomKillDisable = &defaultOomKillDisable 314 } 315 316 return nil 317 } 318 319 // adaptSharedNamespaceContainer replaces container name with its ID in hostConfig. 320 // To be more precisely, it modifies `container:name` to `container:ID` of PidMode, IpcMode 321 // and NetworkMode. 322 // 323 // When a container shares its namespace with another container, use ID can keep the namespace 324 // sharing connection between the two containers even the another container is renamed. 325 func adaptSharedNamespaceContainer(daemon containerGetter, hostConfig *containertypes.HostConfig) { 326 containerPrefix := "container:" 327 if hostConfig.PidMode.IsContainer() { 328 pidContainer := hostConfig.PidMode.Container() 329 // if there is any error returned here, we just ignore it and leave it to be 330 // handled in the following logic 331 if c, err := daemon.GetContainer(pidContainer); err == nil { 332 hostConfig.PidMode = containertypes.PidMode(containerPrefix + c.ID) 333 } 334 } 335 if hostConfig.IpcMode.IsContainer() { 336 ipcContainer := hostConfig.IpcMode.Container() 337 if c, err := daemon.GetContainer(ipcContainer); err == nil { 338 hostConfig.IpcMode = containertypes.IpcMode(containerPrefix + c.ID) 339 } 340 } 341 if hostConfig.NetworkMode.IsContainer() { 342 netContainer := hostConfig.NetworkMode.ConnectedContainer() 343 if c, err := daemon.GetContainer(netContainer); err == nil { 344 hostConfig.NetworkMode = containertypes.NetworkMode(containerPrefix + c.ID) 345 } 346 } 347 } 348 349 func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) { 350 warnings := []string{} 351 fixMemorySwappiness(resources) 352 353 // memory subsystem checks and adjustments 354 if resources.Memory != 0 && resources.Memory < linuxMinMemory { 355 return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB") 356 } 357 if resources.Memory > 0 && !sysInfo.MemoryLimit { 358 warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 359 logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 360 resources.Memory = 0 361 resources.MemorySwap = -1 362 } 363 if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit { 364 warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") 365 logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.") 366 resources.MemorySwap = -1 367 } 368 if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory { 369 return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage") 370 } 371 if resources.Memory == 0 && resources.MemorySwap > 0 && !update { 372 return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage") 373 } 374 if resources.MemorySwappiness != nil && !sysInfo.MemorySwappiness { 375 warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.") 376 logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.") 377 resources.MemorySwappiness = nil 378 } 379 if resources.MemorySwappiness != nil { 380 swappiness := *resources.MemorySwappiness 381 if swappiness < 0 || swappiness > 100 { 382 return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness) 383 } 384 } 385 if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation { 386 warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 387 logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 388 resources.MemoryReservation = 0 389 } 390 if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory { 391 return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB") 392 } 393 if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation { 394 return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage") 395 } 396 if resources.KernelMemory > 0 && !sysInfo.KernelMemory { 397 warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 398 logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 399 resources.KernelMemory = 0 400 } 401 if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory { 402 return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB") 403 } 404 if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) { 405 warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 406 logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 407 } 408 if resources.OomKillDisable != nil && !sysInfo.OomKillDisable { 409 // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point 410 // warning the caller if they already wanted the feature to be off 411 if *resources.OomKillDisable { 412 warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.") 413 logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.") 414 } 415 resources.OomKillDisable = nil 416 } 417 418 if resources.PidsLimit != 0 && !sysInfo.PidsLimit { 419 warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 420 logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 421 resources.PidsLimit = 0 422 } 423 424 // cpu subsystem checks and adjustments 425 if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 { 426 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set") 427 } 428 if resources.NanoCPUs > 0 && resources.CPUQuota > 0 { 429 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set") 430 } 431 if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) { 432 return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted") 433 } 434 // The highest precision we could get on Linux is 0.001, by setting 435 // cpu.cfs_period_us=1000ms 436 // cpu.cfs_quota=1ms 437 // See the following link for details: 438 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 439 // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error. 440 // The error message is 0.01 so that this is consistent with Windows 441 if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 { 442 return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU()) 443 } 444 445 if resources.CPUShares > 0 && !sysInfo.CPUShares { 446 warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 447 logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 448 resources.CPUShares = 0 449 } 450 if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod { 451 warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 452 logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 453 resources.CPUPeriod = 0 454 } 455 if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) { 456 return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)") 457 } 458 if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota { 459 warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 460 logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 461 resources.CPUQuota = 0 462 } 463 if resources.CPUQuota > 0 && resources.CPUQuota < 1000 { 464 return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)") 465 } 466 if resources.CPUPercent > 0 { 467 warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)) 468 logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS) 469 resources.CPUPercent = 0 470 } 471 472 // cpuset subsystem checks and adjustments 473 if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset { 474 warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 475 logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 476 resources.CpusetCpus = "" 477 resources.CpusetMems = "" 478 } 479 cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus) 480 if err != nil { 481 return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus) 482 } 483 if !cpusAvailable { 484 return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus) 485 } 486 memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems) 487 if err != nil { 488 return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems) 489 } 490 if !memsAvailable { 491 return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems) 492 } 493 494 // blkio subsystem checks and adjustments 495 if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight { 496 warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 497 logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 498 resources.BlkioWeight = 0 499 } 500 if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) { 501 return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000") 502 } 503 if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 { 504 return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS) 505 } 506 if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice { 507 warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 508 logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 509 resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{} 510 } 511 if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice { 512 warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.") 513 logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded") 514 resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{} 515 } 516 if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice { 517 warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 518 logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 519 resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{} 520 521 } 522 if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice { 523 warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 524 logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 525 resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{} 526 } 527 if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice { 528 warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 529 logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 530 resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{} 531 } 532 533 return warnings, nil 534 } 535 536 func (daemon *Daemon) getCgroupDriver() string { 537 cgroupDriver := cgroupFsDriver 538 539 if UsingSystemd(daemon.configStore) { 540 cgroupDriver = cgroupSystemdDriver 541 } 542 return cgroupDriver 543 } 544 545 // getCD gets the raw value of the native.cgroupdriver option, if set. 546 func getCD(config *config.Config) string { 547 for _, option := range config.ExecOptions { 548 key, val, err := parsers.ParseKeyValueOpt(option) 549 if err != nil || !strings.EqualFold(key, "native.cgroupdriver") { 550 continue 551 } 552 return val 553 } 554 return "" 555 } 556 557 // VerifyCgroupDriver validates native.cgroupdriver 558 func VerifyCgroupDriver(config *config.Config) error { 559 cd := getCD(config) 560 if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver { 561 return nil 562 } 563 return fmt.Errorf("native.cgroupdriver option %s not supported", cd) 564 } 565 566 // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd 567 func UsingSystemd(config *config.Config) bool { 568 return getCD(config) == cgroupSystemdDriver 569 } 570 571 // verifyPlatformContainerSettings performs platform-specific validation of the 572 // hostconfig and config structures. 573 func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) { 574 var warnings []string 575 sysInfo := sysinfo.New(true) 576 577 w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update) 578 579 // no matter err is nil or not, w could have data in itself. 580 warnings = append(warnings, w...) 581 582 if err != nil { 583 return warnings, err 584 } 585 586 if hostConfig.ShmSize < 0 { 587 return warnings, fmt.Errorf("SHM size can not be less than 0") 588 } 589 590 if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 { 591 return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj) 592 } 593 594 // ip-forwarding does not affect container with '--net=host' (or '--net=none') 595 if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) { 596 warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") 597 logrus.Warn("IPv4 forwarding is disabled. Networking will not work") 598 } 599 // check for various conflicting options with user namespaces 600 if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() { 601 if hostConfig.Privileged { 602 return warnings, fmt.Errorf("privileged mode is incompatible with user namespaces. You must run the container in the host namespace when running privileged mode") 603 } 604 if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 605 return warnings, fmt.Errorf("cannot share the host's network namespace when user namespaces are enabled") 606 } 607 if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 608 return warnings, fmt.Errorf("cannot share the host PID namespace when user namespaces are enabled") 609 } 610 } 611 if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) { 612 // CgroupParent for systemd cgroup should be named as "xxx.slice" 613 if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") { 614 return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 615 } 616 } 617 if hostConfig.Runtime == "" { 618 hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName() 619 } 620 621 if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil { 622 return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime) 623 } 624 625 parser := volume.NewParser(runtime.GOOS) 626 for dest := range hostConfig.Tmpfs { 627 if err := parser.ValidateTmpfsMountDestination(dest); err != nil { 628 return warnings, err 629 } 630 } 631 632 return warnings, nil 633 } 634 635 func (daemon *Daemon) loadRuntimes() error { 636 return daemon.initRuntimes(daemon.configStore.Runtimes) 637 } 638 639 func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) { 640 runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes") 641 // Remove old temp directory if any 642 os.RemoveAll(runtimeDir + "-old") 643 tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes") 644 if err != nil { 645 return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts") 646 } 647 defer func() { 648 if err != nil { 649 if err1 := os.RemoveAll(tmpDir); err1 != nil { 650 logrus.WithError(err1).WithField("dir", tmpDir). 651 Warnf("failed to remove tmp dir") 652 } 653 return 654 } 655 656 if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil { 657 return 658 } 659 if err = os.Rename(tmpDir, runtimeDir); err != nil { 660 err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start") 661 return 662 } 663 if err = os.RemoveAll(runtimeDir + "-old"); err != nil { 664 logrus.WithError(err).WithField("dir", tmpDir). 665 Warnf("failed to remove old runtimes dir") 666 } 667 }() 668 669 for name, rt := range runtimes { 670 if len(rt.Args) == 0 { 671 continue 672 } 673 674 script := filepath.Join(tmpDir, name) 675 content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " ")) 676 if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil { 677 return err 678 } 679 } 680 return nil 681 } 682 683 // reloadPlatform updates configuration with platform specific options 684 // and updates the passed attributes 685 func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error { 686 if err := conf.ValidatePlatformConfig(); err != nil { 687 return err 688 } 689 690 if conf.IsValueSet("runtimes") { 691 // Always set the default one 692 conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 693 if err := daemon.initRuntimes(conf.Runtimes); err != nil { 694 return err 695 } 696 daemon.configStore.Runtimes = conf.Runtimes 697 } 698 699 if conf.DefaultRuntime != "" { 700 daemon.configStore.DefaultRuntime = conf.DefaultRuntime 701 } 702 703 if conf.IsValueSet("default-shm-size") { 704 daemon.configStore.ShmSize = conf.ShmSize 705 } 706 707 if conf.IpcMode != "" { 708 daemon.configStore.IpcMode = conf.IpcMode 709 } 710 711 // Update attributes 712 var runtimeList bytes.Buffer 713 for name, rt := range daemon.configStore.Runtimes { 714 if runtimeList.Len() > 0 { 715 runtimeList.WriteRune(' ') 716 } 717 runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt)) 718 } 719 720 attributes["runtimes"] = runtimeList.String() 721 attributes["default-runtime"] = daemon.configStore.DefaultRuntime 722 attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize) 723 attributes["default-ipc-mode"] = daemon.configStore.IpcMode 724 725 return nil 726 } 727 728 // verifyDaemonSettings performs validation of daemon config struct 729 func verifyDaemonSettings(conf *config.Config) error { 730 // Check for mutually incompatible config options 731 if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" { 732 return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one") 733 } 734 if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication { 735 return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true") 736 } 737 if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq { 738 conf.BridgeConfig.EnableIPMasq = false 739 } 740 if err := VerifyCgroupDriver(conf); err != nil { 741 return err 742 } 743 if conf.CgroupParent != "" && UsingSystemd(conf) { 744 if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") { 745 return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 746 } 747 } 748 749 if conf.DefaultRuntime == "" { 750 conf.DefaultRuntime = config.StockRuntimeName 751 } 752 if conf.Runtimes == nil { 753 conf.Runtimes = make(map[string]types.Runtime) 754 } 755 conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName} 756 757 return nil 758 } 759 760 // checkSystem validates platform-specific requirements 761 func checkSystem() error { 762 if os.Geteuid() != 0 { 763 return fmt.Errorf("The Docker daemon needs to be run as root") 764 } 765 return checkKernel() 766 } 767 768 // configureMaxThreads sets the Go runtime max threads threshold 769 // which is 90% of the kernel setting from /proc/sys/kernel/threads-max 770 func configureMaxThreads(config *config.Config) error { 771 mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") 772 if err != nil { 773 return err 774 } 775 mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) 776 if err != nil { 777 return err 778 } 779 maxThreads := (mtint / 100) * 90 780 debug.SetMaxThreads(maxThreads) 781 logrus.Debugf("Golang's threads limit set to %d", maxThreads) 782 return nil 783 } 784 785 func overlaySupportsSelinux() (bool, error) { 786 f, err := os.Open("/proc/kallsyms") 787 if err != nil { 788 if os.IsNotExist(err) { 789 return false, nil 790 } 791 return false, err 792 } 793 defer f.Close() 794 795 var symAddr, symType, symName, text string 796 797 s := bufio.NewScanner(f) 798 for s.Scan() { 799 if err := s.Err(); err != nil { 800 return false, err 801 } 802 803 text = s.Text() 804 if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil { 805 return false, fmt.Errorf("Scanning '%s' failed: %s", text, err) 806 } 807 808 // Check for presence of symbol security_inode_copy_up. 809 if symName == "security_inode_copy_up" { 810 return true, nil 811 } 812 } 813 return false, nil 814 } 815 816 // configureKernelSecuritySupport configures and validates security support for the kernel 817 func configureKernelSecuritySupport(config *config.Config, driverName string) error { 818 if config.EnableSelinuxSupport { 819 if !selinuxEnabled() { 820 logrus.Warn("Docker could not enable SELinux on the host system") 821 return nil 822 } 823 824 if driverName == "overlay" || driverName == "overlay2" { 825 // If driver is overlay or overlay2, make sure kernel 826 // supports selinux with overlay. 827 supported, err := overlaySupportsSelinux() 828 if err != nil { 829 return err 830 } 831 832 if !supported { 833 logrus.Warnf("SELinux is not supported with the %v graph driver on this kernel", driverName) 834 } 835 } 836 } else { 837 selinuxSetDisabled() 838 } 839 return nil 840 } 841 842 func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) { 843 netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes) 844 if err != nil { 845 return nil, err 846 } 847 848 controller, err := libnetwork.New(netOptions...) 849 if err != nil { 850 return nil, fmt.Errorf("error obtaining controller instance: %v", err) 851 } 852 853 if len(activeSandboxes) > 0 { 854 logrus.Info("There are old running containers, the network config will not take affect") 855 return controller, nil 856 } 857 858 // Initialize default network on "null" 859 if n, _ := controller.NetworkByName("none"); n == nil { 860 if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil { 861 return nil, fmt.Errorf("Error creating default \"null\" network: %v", err) 862 } 863 } 864 865 // Initialize default network on "host" 866 if n, _ := controller.NetworkByName("host"); n == nil { 867 if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil { 868 return nil, fmt.Errorf("Error creating default \"host\" network: %v", err) 869 } 870 } 871 872 // Clear stale bridge network 873 if n, err := controller.NetworkByName("bridge"); err == nil { 874 if err = n.Delete(); err != nil { 875 return nil, fmt.Errorf("could not delete the default bridge network: %v", err) 876 } 877 } 878 879 if !config.DisableBridge { 880 // Initialize default driver "bridge" 881 if err := initBridgeDriver(controller, config); err != nil { 882 return nil, err 883 } 884 } else { 885 removeDefaultBridgeInterface() 886 } 887 888 return controller, nil 889 } 890 891 func driverOptions(config *config.Config) []nwconfig.Option { 892 bridgeConfig := options.Generic{ 893 "EnableIPForwarding": config.BridgeConfig.EnableIPForward, 894 "EnableIPTables": config.BridgeConfig.EnableIPTables, 895 "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy, 896 "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath} 897 bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig} 898 899 dOptions := []nwconfig.Option{} 900 dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption)) 901 return dOptions 902 } 903 904 func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error { 905 bridgeName := bridge.DefaultBridgeName 906 if config.BridgeConfig.Iface != "" { 907 bridgeName = config.BridgeConfig.Iface 908 } 909 netOption := map[string]string{ 910 bridge.BridgeName: bridgeName, 911 bridge.DefaultBridge: strconv.FormatBool(true), 912 netlabel.DriverMTU: strconv.Itoa(config.Mtu), 913 bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq), 914 bridge.EnableICC: strconv.FormatBool(config.BridgeConfig.InterContainerCommunication), 915 } 916 917 // --ip processing 918 if config.BridgeConfig.DefaultIP != nil { 919 netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String() 920 } 921 922 var ( 923 ipamV4Conf *libnetwork.IpamConf 924 ipamV6Conf *libnetwork.IpamConf 925 ) 926 927 ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 928 929 nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName) 930 if err != nil { 931 return errors.Wrap(err, "list bridge addresses failed") 932 } 933 934 nw := nwList[0] 935 if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" { 936 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 937 if err != nil { 938 return errors.Wrap(err, "parse CIDR failed") 939 } 940 // Iterate through in case there are multiple addresses for the bridge 941 for _, entry := range nwList { 942 if fCIDR.Contains(entry.IP) { 943 nw = entry 944 break 945 } 946 } 947 } 948 949 ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() 950 hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) 951 if hip.IsGlobalUnicast() { 952 ipamV4Conf.Gateway = nw.IP.String() 953 } 954 955 if config.BridgeConfig.IP != "" { 956 ipamV4Conf.PreferredPool = config.BridgeConfig.IP 957 ip, _, err := net.ParseCIDR(config.BridgeConfig.IP) 958 if err != nil { 959 return err 960 } 961 ipamV4Conf.Gateway = ip.String() 962 } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" { 963 logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool) 964 } 965 966 if config.BridgeConfig.FixedCIDR != "" { 967 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 968 if err != nil { 969 return err 970 } 971 972 ipamV4Conf.SubPool = fCIDR.String() 973 } 974 975 if config.BridgeConfig.DefaultGatewayIPv4 != nil { 976 ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String() 977 } 978 979 var deferIPv6Alloc bool 980 if config.BridgeConfig.FixedCIDRv6 != "" { 981 _, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6) 982 if err != nil { 983 return err 984 } 985 986 // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has 987 // at least 48 host bits, we need to guarantee the current behavior where the containers' 988 // IPv6 addresses will be constructed based on the containers' interface MAC address. 989 // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints 990 // on this network until after the driver has created the endpoint and returned the 991 // constructed address. Libnetwork will then reserve this address with the ipam driver. 992 ones, _ := fCIDRv6.Mask.Size() 993 deferIPv6Alloc = ones <= 80 994 995 if ipamV6Conf == nil { 996 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 997 } 998 ipamV6Conf.PreferredPool = fCIDRv6.String() 999 1000 // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6 1001 // address belongs to the same network, we need to inform libnetwork about it, so 1002 // that it can be reserved with IPAM and it will not be given away to somebody else 1003 for _, nw6 := range nw6List { 1004 if fCIDRv6.Contains(nw6.IP) { 1005 ipamV6Conf.Gateway = nw6.IP.String() 1006 break 1007 } 1008 } 1009 } 1010 1011 if config.BridgeConfig.DefaultGatewayIPv6 != nil { 1012 if ipamV6Conf == nil { 1013 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 1014 } 1015 ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String() 1016 } 1017 1018 v4Conf := []*libnetwork.IpamConf{ipamV4Conf} 1019 v6Conf := []*libnetwork.IpamConf{} 1020 if ipamV6Conf != nil { 1021 v6Conf = append(v6Conf, ipamV6Conf) 1022 } 1023 // Initialize default network on "bridge" with the same name 1024 _, err = controller.NewNetwork("bridge", "bridge", "", 1025 libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6), 1026 libnetwork.NetworkOptionDriverOpts(netOption), 1027 libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil), 1028 libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc)) 1029 if err != nil { 1030 return fmt.Errorf("Error creating default \"bridge\" network: %v", err) 1031 } 1032 return nil 1033 } 1034 1035 // Remove default bridge interface if present (--bridge=none use case) 1036 func removeDefaultBridgeInterface() { 1037 if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil { 1038 if err := netlink.LinkDel(lnk); err != nil { 1039 logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err) 1040 } 1041 } 1042 } 1043 1044 func (daemon *Daemon) getLayerInit() func(containerfs.ContainerFS) error { 1045 return daemon.setupInitLayer 1046 } 1047 1048 // Parse the remapped root (user namespace) option, which can be one of: 1049 // username - valid username from /etc/passwd 1050 // username:groupname - valid username; valid groupname from /etc/group 1051 // uid - 32-bit unsigned int valid Linux UID value 1052 // uid:gid - uid value; 32-bit unsigned int Linux GID value 1053 // 1054 // If no groupname is specified, and a username is specified, an attempt 1055 // will be made to lookup a gid for that username as a groupname 1056 // 1057 // If names are used, they are verified to exist in passwd/group 1058 func parseRemappedRoot(usergrp string) (string, string, error) { 1059 1060 var ( 1061 userID, groupID int 1062 username, groupname string 1063 ) 1064 1065 idparts := strings.Split(usergrp, ":") 1066 if len(idparts) > 2 { 1067 return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) 1068 } 1069 1070 if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { 1071 // must be a uid; take it as valid 1072 userID = int(uid) 1073 luser, err := idtools.LookupUID(userID) 1074 if err != nil { 1075 return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) 1076 } 1077 username = luser.Name 1078 if len(idparts) == 1 { 1079 // if the uid was numeric and no gid was specified, take the uid as the gid 1080 groupID = userID 1081 lgrp, err := idtools.LookupGID(groupID) 1082 if err != nil { 1083 return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) 1084 } 1085 groupname = lgrp.Name 1086 } 1087 } else { 1088 lookupName := idparts[0] 1089 // special case: if the user specified "default", they want Docker to create or 1090 // use (after creation) the "dockremap" user/group for root remapping 1091 if lookupName == defaultIDSpecifier { 1092 lookupName = defaultRemappedID 1093 } 1094 luser, err := idtools.LookupUser(lookupName) 1095 if err != nil && idparts[0] != defaultIDSpecifier { 1096 // error if the name requested isn't the special "dockremap" ID 1097 return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) 1098 } else if err != nil { 1099 // special case-- if the username == "default", then we have been asked 1100 // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} 1101 // ranges will be used for the user and group mappings in user namespaced containers 1102 _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) 1103 if err == nil { 1104 return defaultRemappedID, defaultRemappedID, nil 1105 } 1106 return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) 1107 } 1108 username = luser.Name 1109 if len(idparts) == 1 { 1110 // we only have a string username, and no group specified; look up gid from username as group 1111 group, err := idtools.LookupGroup(lookupName) 1112 if err != nil { 1113 return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) 1114 } 1115 groupname = group.Name 1116 } 1117 } 1118 1119 if len(idparts) == 2 { 1120 // groupname or gid is separately specified and must be resolved 1121 // to an unsigned 32-bit gid 1122 if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { 1123 // must be a gid, take it as valid 1124 groupID = int(gid) 1125 lgrp, err := idtools.LookupGID(groupID) 1126 if err != nil { 1127 return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) 1128 } 1129 groupname = lgrp.Name 1130 } else { 1131 // not a number; attempt a lookup 1132 if _, err := idtools.LookupGroup(idparts[1]); err != nil { 1133 return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err) 1134 } 1135 groupname = idparts[1] 1136 } 1137 } 1138 return username, groupname, nil 1139 } 1140 1141 func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) { 1142 if runtime.GOOS != "linux" && config.RemappedRoot != "" { 1143 return nil, fmt.Errorf("User namespaces are only supported on Linux") 1144 } 1145 1146 // if the daemon was started with remapped root option, parse 1147 // the config option to the int uid,gid values 1148 if config.RemappedRoot != "" { 1149 username, groupname, err := parseRemappedRoot(config.RemappedRoot) 1150 if err != nil { 1151 return nil, err 1152 } 1153 if username == "root" { 1154 // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op 1155 // effectively 1156 logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF") 1157 return &idtools.IDMappings{}, nil 1158 } 1159 logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) 1160 // update remapped root setting now that we have resolved them to actual names 1161 config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) 1162 1163 mappings, err := idtools.NewIDMappings(username, groupname) 1164 if err != nil { 1165 return nil, errors.Wrapf(err, "Can't create ID mappings: %v") 1166 } 1167 return mappings, nil 1168 } 1169 return &idtools.IDMappings{}, nil 1170 } 1171 1172 func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error { 1173 config.Root = rootDir 1174 // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x) 1175 // so that syscalls executing as non-root, operating on subdirectories of the graph root 1176 // (e.g. mounted layers of a container) can traverse this path. 1177 // The user namespace support will create subdirectories for the remapped root host uid:gid 1178 // pair owned by that same uid:gid pair for proper write access to those needed metadata and 1179 // layer content subtrees. 1180 if _, err := os.Stat(rootDir); err == nil { 1181 // root current exists; verify the access bits are correct by setting them 1182 if err = os.Chmod(rootDir, 0711); err != nil { 1183 return err 1184 } 1185 } else if os.IsNotExist(err) { 1186 // no root exists yet, create it 0711 with root:root ownership 1187 if err := os.MkdirAll(rootDir, 0711); err != nil { 1188 return err 1189 } 1190 } 1191 1192 // if user namespaces are enabled we will create a subtree underneath the specified root 1193 // with any/all specified remapped root uid/gid options on the daemon creating 1194 // a new subdirectory with ownership set to the remapped uid/gid (so as to allow 1195 // `chdir()` to work for containers namespaced to that uid/gid) 1196 if config.RemappedRoot != "" { 1197 config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID)) 1198 logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) 1199 // Create the root directory if it doesn't exist 1200 if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil { 1201 return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) 1202 } 1203 // we also need to verify that any pre-existing directories in the path to 1204 // the graphroot won't block access to remapped root--if any pre-existing directory 1205 // has strict permissions that don't allow "x", container start will fail, so 1206 // better to warn and fail now 1207 dirPath := config.Root 1208 for { 1209 dirPath = filepath.Dir(dirPath) 1210 if dirPath == "/" { 1211 break 1212 } 1213 if !idtools.CanAccess(dirPath, rootIDs) { 1214 return fmt.Errorf("a subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories", config.Root) 1215 } 1216 } 1217 } 1218 return nil 1219 } 1220 1221 // registerLinks writes the links to a file. 1222 func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error { 1223 if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() { 1224 return nil 1225 } 1226 1227 for _, l := range hostConfig.Links { 1228 name, alias, err := opts.ParseLink(l) 1229 if err != nil { 1230 return err 1231 } 1232 child, err := daemon.GetContainer(name) 1233 if err != nil { 1234 return errors.Wrapf(err, "could not get container for %s", name) 1235 } 1236 for child.HostConfig.NetworkMode.IsContainer() { 1237 parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2) 1238 child, err = daemon.GetContainer(parts[1]) 1239 if err != nil { 1240 return errors.Wrapf(err, "Could not get container for %s", parts[1]) 1241 } 1242 } 1243 if child.HostConfig.NetworkMode.IsHost() { 1244 return runconfig.ErrConflictHostNetworkAndLinks 1245 } 1246 if err := daemon.registerLink(container, child, alias); err != nil { 1247 return err 1248 } 1249 } 1250 1251 // After we load all the links into the daemon 1252 // set them to nil on the hostconfig 1253 _, err := container.WriteHostConfig() 1254 return err 1255 } 1256 1257 // conditionalMountOnStart is a platform specific helper function during the 1258 // container start to call mount. 1259 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { 1260 return daemon.Mount(container) 1261 } 1262 1263 // conditionalUnmountOnCleanup is a platform specific helper function called 1264 // during the cleanup of a container to unmount. 1265 func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { 1266 return daemon.Unmount(container) 1267 } 1268 1269 func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry { 1270 out := make([]types.BlkioStatEntry, len(entries)) 1271 for i, re := range entries { 1272 out[i] = types.BlkioStatEntry{ 1273 Major: re.Major, 1274 Minor: re.Minor, 1275 Op: re.Op, 1276 Value: re.Value, 1277 } 1278 } 1279 return out 1280 } 1281 1282 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { 1283 if !c.IsRunning() { 1284 return nil, errNotRunning(c.ID) 1285 } 1286 cs, err := daemon.containerd.Stats(context.Background(), c.ID) 1287 if err != nil { 1288 if strings.Contains(err.Error(), "container not found") { 1289 return nil, containerNotFound(c.ID) 1290 } 1291 return nil, err 1292 } 1293 s := &types.StatsJSON{} 1294 s.Read = cs.Read 1295 stats := cs.Metrics 1296 if stats.Blkio != nil { 1297 s.BlkioStats = types.BlkioStats{ 1298 IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive), 1299 IoServicedRecursive: copyBlkioEntry(stats.Blkio.IoServicedRecursive), 1300 IoQueuedRecursive: copyBlkioEntry(stats.Blkio.IoQueuedRecursive), 1301 IoServiceTimeRecursive: copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive), 1302 IoWaitTimeRecursive: copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive), 1303 IoMergedRecursive: copyBlkioEntry(stats.Blkio.IoMergedRecursive), 1304 IoTimeRecursive: copyBlkioEntry(stats.Blkio.IoTimeRecursive), 1305 SectorsRecursive: copyBlkioEntry(stats.Blkio.SectorsRecursive), 1306 } 1307 } 1308 if stats.CPU != nil { 1309 s.CPUStats = types.CPUStats{ 1310 CPUUsage: types.CPUUsage{ 1311 TotalUsage: stats.CPU.Usage.Total, 1312 PercpuUsage: stats.CPU.Usage.PerCPU, 1313 UsageInKernelmode: stats.CPU.Usage.Kernel, 1314 UsageInUsermode: stats.CPU.Usage.User, 1315 }, 1316 ThrottlingData: types.ThrottlingData{ 1317 Periods: stats.CPU.Throttling.Periods, 1318 ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods, 1319 ThrottledTime: stats.CPU.Throttling.ThrottledTime, 1320 }, 1321 } 1322 } 1323 1324 if stats.Memory != nil { 1325 raw := make(map[string]uint64) 1326 raw["cache"] = stats.Memory.Cache 1327 raw["rss"] = stats.Memory.RSS 1328 raw["rss_huge"] = stats.Memory.RSSHuge 1329 raw["mapped_file"] = stats.Memory.MappedFile 1330 raw["dirty"] = stats.Memory.Dirty 1331 raw["writeback"] = stats.Memory.Writeback 1332 raw["pgpgin"] = stats.Memory.PgPgIn 1333 raw["pgpgout"] = stats.Memory.PgPgOut 1334 raw["pgfault"] = stats.Memory.PgFault 1335 raw["pgmajfault"] = stats.Memory.PgMajFault 1336 raw["inactive_anon"] = stats.Memory.InactiveAnon 1337 raw["active_anon"] = stats.Memory.ActiveAnon 1338 raw["inactive_file"] = stats.Memory.InactiveFile 1339 raw["active_file"] = stats.Memory.ActiveFile 1340 raw["unevictable"] = stats.Memory.Unevictable 1341 raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit 1342 raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit 1343 raw["total_cache"] = stats.Memory.TotalCache 1344 raw["total_rss"] = stats.Memory.TotalRSS 1345 raw["total_rss_huge"] = stats.Memory.TotalRSSHuge 1346 raw["total_mapped_file"] = stats.Memory.TotalMappedFile 1347 raw["total_dirty"] = stats.Memory.TotalDirty 1348 raw["total_writeback"] = stats.Memory.TotalWriteback 1349 raw["total_pgpgin"] = stats.Memory.TotalPgPgIn 1350 raw["total_pgpgout"] = stats.Memory.TotalPgPgOut 1351 raw["total_pgfault"] = stats.Memory.TotalPgFault 1352 raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault 1353 raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon 1354 raw["total_active_anon"] = stats.Memory.TotalActiveAnon 1355 raw["total_inactive_file"] = stats.Memory.TotalInactiveFile 1356 raw["total_active_file"] = stats.Memory.TotalActiveFile 1357 raw["total_unevictable"] = stats.Memory.TotalUnevictable 1358 1359 if stats.Memory.Usage != nil { 1360 s.MemoryStats = types.MemoryStats{ 1361 Stats: raw, 1362 Usage: stats.Memory.Usage.Usage, 1363 MaxUsage: stats.Memory.Usage.Max, 1364 Limit: stats.Memory.Usage.Limit, 1365 Failcnt: stats.Memory.Usage.Failcnt, 1366 } 1367 } else { 1368 s.MemoryStats = types.MemoryStats{ 1369 Stats: raw, 1370 } 1371 } 1372 1373 // if the container does not set memory limit, use the machineMemory 1374 if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 { 1375 s.MemoryStats.Limit = daemon.machineMemory 1376 } 1377 } 1378 1379 if stats.Pids != nil { 1380 s.PidsStats = types.PidsStats{ 1381 Current: stats.Pids.Current, 1382 Limit: stats.Pids.Limit, 1383 } 1384 } 1385 1386 return s, nil 1387 } 1388 1389 // setDefaultIsolation determines the default isolation mode for the 1390 // daemon to run in. This is only applicable on Windows 1391 func (daemon *Daemon) setDefaultIsolation() error { 1392 return nil 1393 } 1394 1395 func rootFSToAPIType(rootfs *image.RootFS) types.RootFS { 1396 var layers []string 1397 for _, l := range rootfs.DiffIDs { 1398 layers = append(layers, l.String()) 1399 } 1400 return types.RootFS{ 1401 Type: rootfs.Type, 1402 Layers: layers, 1403 } 1404 } 1405 1406 // setupDaemonProcess sets various settings for the daemon's process 1407 func setupDaemonProcess(config *config.Config) error { 1408 // setup the daemons oom_score_adj 1409 if err := setupOOMScoreAdj(config.OOMScoreAdjust); err != nil { 1410 return err 1411 } 1412 if err := setMayDetachMounts(); err != nil { 1413 logrus.WithError(err).Warn("Could not set may_detach_mounts kernel parameter") 1414 } 1415 return nil 1416 } 1417 1418 // This is used to allow removal of mountpoints that may be mounted in other 1419 // namespaces on RHEL based kernels starting from RHEL 7.4. 1420 // Without this setting, removals on these RHEL based kernels may fail with 1421 // "device or resource busy". 1422 // This setting is not available in upstream kernels as it is not configurable, 1423 // but has been in the upstream kernels since 3.15. 1424 func setMayDetachMounts() error { 1425 f, err := os.OpenFile("/proc/sys/fs/may_detach_mounts", os.O_WRONLY, 0) 1426 if err != nil { 1427 if os.IsNotExist(err) { 1428 return nil 1429 } 1430 return errors.Wrap(err, "error opening may_detach_mounts kernel config file") 1431 } 1432 defer f.Close() 1433 1434 _, err = f.WriteString("1") 1435 if os.IsPermission(err) { 1436 // Setting may_detach_mounts does not work in an 1437 // unprivileged container. Ignore the error, but log 1438 // it if we appear not to be in that situation. 1439 if !rsystem.RunningInUserNS() { 1440 logrus.Debugf("Permission denied writing %q to /proc/sys/fs/may_detach_mounts", "1") 1441 } 1442 return nil 1443 } 1444 return err 1445 } 1446 1447 func setupOOMScoreAdj(score int) error { 1448 f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0) 1449 if err != nil { 1450 return err 1451 } 1452 defer f.Close() 1453 stringScore := strconv.Itoa(score) 1454 _, err = f.WriteString(stringScore) 1455 if os.IsPermission(err) { 1456 // Setting oom_score_adj does not work in an 1457 // unprivileged container. Ignore the error, but log 1458 // it if we appear not to be in that situation. 1459 if !rsystem.RunningInUserNS() { 1460 logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore) 1461 } 1462 return nil 1463 } 1464 1465 return err 1466 } 1467 1468 func (daemon *Daemon) initCgroupsPath(path string) error { 1469 if path == "/" || path == "." { 1470 return nil 1471 } 1472 1473 if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 { 1474 return nil 1475 } 1476 1477 // Recursively create cgroup to ensure that the system and all parent cgroups have values set 1478 // for the period and runtime as this limits what the children can be set to. 1479 daemon.initCgroupsPath(filepath.Dir(path)) 1480 1481 mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu") 1482 if err != nil { 1483 return err 1484 } 1485 // When docker is run inside docker, the root is based of the host cgroup. 1486 // Should this be handled in runc/libcontainer/cgroups ? 1487 if strings.HasPrefix(root, "/docker/") { 1488 root = "/" 1489 } 1490 1491 path = filepath.Join(mnt, root, path) 1492 sysinfo := sysinfo.New(true) 1493 if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil { 1494 return err 1495 } 1496 return maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path) 1497 } 1498 1499 func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error { 1500 if sysinfoPresent && configValue != 0 { 1501 if err := os.MkdirAll(path, 0755); err != nil { 1502 return err 1503 } 1504 if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil { 1505 return err 1506 } 1507 } 1508 return nil 1509 } 1510 1511 func (daemon *Daemon) setupSeccompProfile() error { 1512 if daemon.configStore.SeccompProfile != "" { 1513 daemon.seccompProfilePath = daemon.configStore.SeccompProfile 1514 b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile) 1515 if err != nil { 1516 return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err) 1517 } 1518 daemon.seccompProfile = b 1519 } 1520 return nil 1521 }