github.com/lazyboychen7/engine@v17.12.1-ce-rc2+incompatible/daemon/daemon_unix.go (about) 1 // +build linux freebsd 2 3 package daemon 4 5 import ( 6 "bufio" 7 "bytes" 8 "context" 9 "fmt" 10 "io/ioutil" 11 "net" 12 "os" 13 "path/filepath" 14 "runtime" 15 "runtime/debug" 16 "strconv" 17 "strings" 18 "time" 19 20 containerd_cgroups "github.com/containerd/cgroups" 21 "github.com/docker/docker/api/types" 22 "github.com/docker/docker/api/types/blkiodev" 23 pblkiodev "github.com/docker/docker/api/types/blkiodev" 24 containertypes "github.com/docker/docker/api/types/container" 25 "github.com/docker/docker/container" 26 "github.com/docker/docker/daemon/config" 27 "github.com/docker/docker/image" 28 "github.com/docker/docker/opts" 29 "github.com/docker/docker/pkg/containerfs" 30 "github.com/docker/docker/pkg/idtools" 31 "github.com/docker/docker/pkg/ioutils" 32 "github.com/docker/docker/pkg/mount" 33 "github.com/docker/docker/pkg/parsers" 34 "github.com/docker/docker/pkg/parsers/kernel" 35 "github.com/docker/docker/pkg/sysinfo" 36 "github.com/docker/docker/runconfig" 37 "github.com/docker/docker/volume" 38 "github.com/docker/libnetwork" 39 nwconfig "github.com/docker/libnetwork/config" 40 "github.com/docker/libnetwork/drivers/bridge" 41 "github.com/docker/libnetwork/netlabel" 42 "github.com/docker/libnetwork/netutils" 43 "github.com/docker/libnetwork/options" 44 lntypes "github.com/docker/libnetwork/types" 45 "github.com/opencontainers/runc/libcontainer/cgroups" 46 rsystem "github.com/opencontainers/runc/libcontainer/system" 47 specs "github.com/opencontainers/runtime-spec/specs-go" 48 "github.com/opencontainers/selinux/go-selinux/label" 49 "github.com/pkg/errors" 50 "github.com/sirupsen/logrus" 51 "github.com/vishvananda/netlink" 52 "golang.org/x/sys/unix" 53 ) 54 55 const ( 56 // DefaultShimBinary is the default shim to be used by containerd if none 57 // is specified 58 DefaultShimBinary = "docker-containerd-shim" 59 60 // DefaultRuntimeBinary is the default runtime to be used by 61 // containerd if none is specified 62 DefaultRuntimeBinary = "docker-runc" 63 64 // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269 65 linuxMinCPUShares = 2 66 linuxMaxCPUShares = 262144 67 platformSupported = true 68 // It's not kernel limit, we want this 4M limit to supply a reasonable functional container 69 linuxMinMemory = 4194304 70 // constants for remapped root settings 71 defaultIDSpecifier string = "default" 72 defaultRemappedID string = "dockremap" 73 74 // constant for cgroup drivers 75 cgroupFsDriver = "cgroupfs" 76 cgroupSystemdDriver = "systemd" 77 78 // DefaultRuntimeName is the default runtime to be used by 79 // containerd if none is specified 80 DefaultRuntimeName = "docker-runc" 81 ) 82 83 type containerGetter interface { 84 GetContainer(string) (*container.Container, error) 85 } 86 87 func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory { 88 memory := specs.LinuxMemory{} 89 90 if config.Memory > 0 { 91 memory.Limit = &config.Memory 92 } 93 94 if config.MemoryReservation > 0 { 95 memory.Reservation = &config.MemoryReservation 96 } 97 98 if config.MemorySwap > 0 { 99 memory.Swap = &config.MemorySwap 100 } 101 102 if config.MemorySwappiness != nil { 103 swappiness := uint64(*config.MemorySwappiness) 104 memory.Swappiness = &swappiness 105 } 106 107 if config.KernelMemory != 0 { 108 memory.Kernel = &config.KernelMemory 109 } 110 111 return &memory 112 } 113 114 func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) { 115 cpu := specs.LinuxCPU{} 116 117 if config.CPUShares < 0 { 118 return nil, fmt.Errorf("shares: invalid argument") 119 } 120 if config.CPUShares >= 0 { 121 shares := uint64(config.CPUShares) 122 cpu.Shares = &shares 123 } 124 125 if config.CpusetCpus != "" { 126 cpu.Cpus = config.CpusetCpus 127 } 128 129 if config.CpusetMems != "" { 130 cpu.Mems = config.CpusetMems 131 } 132 133 if config.NanoCPUs > 0 { 134 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 135 period := uint64(100 * time.Millisecond / time.Microsecond) 136 quota := config.NanoCPUs * int64(period) / 1e9 137 cpu.Period = &period 138 cpu.Quota = "a 139 } 140 141 if config.CPUPeriod != 0 { 142 period := uint64(config.CPUPeriod) 143 cpu.Period = &period 144 } 145 146 if config.CPUQuota != 0 { 147 q := config.CPUQuota 148 cpu.Quota = &q 149 } 150 151 if config.CPURealtimePeriod != 0 { 152 period := uint64(config.CPURealtimePeriod) 153 cpu.RealtimePeriod = &period 154 } 155 156 if config.CPURealtimeRuntime != 0 { 157 c := config.CPURealtimeRuntime 158 cpu.RealtimeRuntime = &c 159 } 160 161 return &cpu, nil 162 } 163 164 func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) { 165 var stat unix.Stat_t 166 var blkioWeightDevices []specs.LinuxWeightDevice 167 168 for _, weightDevice := range config.BlkioWeightDevice { 169 if err := unix.Stat(weightDevice.Path, &stat); err != nil { 170 return nil, err 171 } 172 weight := weightDevice.Weight 173 d := specs.LinuxWeightDevice{Weight: &weight} 174 d.Major = int64(stat.Rdev / 256) 175 d.Minor = int64(stat.Rdev % 256) 176 blkioWeightDevices = append(blkioWeightDevices, d) 177 } 178 179 return blkioWeightDevices, nil 180 } 181 182 func (daemon *Daemon) parseSecurityOpt(container *container.Container, hostConfig *containertypes.HostConfig) error { 183 container.NoNewPrivileges = daemon.configStore.NoNewPrivileges 184 return parseSecurityOpt(container, hostConfig) 185 } 186 187 func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error { 188 var ( 189 labelOpts []string 190 err error 191 ) 192 193 for _, opt := range config.SecurityOpt { 194 if opt == "no-new-privileges" { 195 container.NoNewPrivileges = true 196 continue 197 } 198 if opt == "disable" { 199 labelOpts = append(labelOpts, "disable") 200 continue 201 } 202 203 var con []string 204 if strings.Contains(opt, "=") { 205 con = strings.SplitN(opt, "=", 2) 206 } else if strings.Contains(opt, ":") { 207 con = strings.SplitN(opt, ":", 2) 208 logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 17.04, use `=` instead.") 209 } 210 if len(con) != 2 { 211 return fmt.Errorf("invalid --security-opt 1: %q", opt) 212 } 213 214 switch con[0] { 215 case "label": 216 labelOpts = append(labelOpts, con[1]) 217 case "apparmor": 218 container.AppArmorProfile = con[1] 219 case "seccomp": 220 container.SeccompProfile = con[1] 221 case "no-new-privileges": 222 noNewPrivileges, err := strconv.ParseBool(con[1]) 223 if err != nil { 224 return fmt.Errorf("invalid --security-opt 2: %q", opt) 225 } 226 container.NoNewPrivileges = noNewPrivileges 227 default: 228 return fmt.Errorf("invalid --security-opt 2: %q", opt) 229 } 230 } 231 232 container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts) 233 return err 234 } 235 236 func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.LinuxThrottleDevice, error) { 237 var throttleDevices []specs.LinuxThrottleDevice 238 var stat unix.Stat_t 239 240 for _, d := range devs { 241 if err := unix.Stat(d.Path, &stat); err != nil { 242 return nil, err 243 } 244 d := specs.LinuxThrottleDevice{Rate: d.Rate} 245 d.Major = int64(stat.Rdev / 256) 246 d.Minor = int64(stat.Rdev % 256) 247 throttleDevices = append(throttleDevices, d) 248 } 249 250 return throttleDevices, nil 251 } 252 253 func checkKernel() error { 254 // Check for unsupported kernel versions 255 // FIXME: it would be cleaner to not test for specific versions, but rather 256 // test for specific functionalities. 257 // Unfortunately we can't test for the feature "does not cause a kernel panic" 258 // without actually causing a kernel panic, so we need this workaround until 259 // the circumstances of pre-3.10 crashes are clearer. 260 // For details see https://github.com/docker/docker/issues/407 261 // Docker 1.11 and above doesn't actually run on kernels older than 3.4, 262 // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4). 263 if !kernel.CheckKernelVersion(3, 10, 0) { 264 v, _ := kernel.GetKernelVersion() 265 if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" { 266 logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String()) 267 } 268 } 269 return nil 270 } 271 272 // adaptContainerSettings is called during container creation to modify any 273 // settings necessary in the HostConfig structure. 274 func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error { 275 if adjustCPUShares && hostConfig.CPUShares > 0 { 276 // Handle unsupported CPUShares 277 if hostConfig.CPUShares < linuxMinCPUShares { 278 logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares) 279 hostConfig.CPUShares = linuxMinCPUShares 280 } else if hostConfig.CPUShares > linuxMaxCPUShares { 281 logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares) 282 hostConfig.CPUShares = linuxMaxCPUShares 283 } 284 } 285 if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 { 286 // By default, MemorySwap is set to twice the size of Memory. 287 hostConfig.MemorySwap = hostConfig.Memory * 2 288 } 289 if hostConfig.ShmSize == 0 { 290 hostConfig.ShmSize = config.DefaultShmSize 291 if daemon.configStore != nil { 292 hostConfig.ShmSize = int64(daemon.configStore.ShmSize) 293 } 294 } 295 // Set default IPC mode, if unset for container 296 if hostConfig.IpcMode.IsEmpty() { 297 m := config.DefaultIpcMode 298 if daemon.configStore != nil { 299 m = daemon.configStore.IpcMode 300 } 301 hostConfig.IpcMode = containertypes.IpcMode(m) 302 } 303 304 adaptSharedNamespaceContainer(daemon, hostConfig) 305 306 var err error 307 opts, err := daemon.generateSecurityOpt(hostConfig) 308 if err != nil { 309 return err 310 } 311 hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...) 312 if hostConfig.OomKillDisable == nil { 313 defaultOomKillDisable := false 314 hostConfig.OomKillDisable = &defaultOomKillDisable 315 } 316 317 return nil 318 } 319 320 // adaptSharedNamespaceContainer replaces container name with its ID in hostConfig. 321 // To be more precisely, it modifies `container:name` to `container:ID` of PidMode, IpcMode 322 // and NetworkMode. 323 // 324 // When a container shares its namespace with another container, use ID can keep the namespace 325 // sharing connection between the two containers even the another container is renamed. 326 func adaptSharedNamespaceContainer(daemon containerGetter, hostConfig *containertypes.HostConfig) { 327 containerPrefix := "container:" 328 if hostConfig.PidMode.IsContainer() { 329 pidContainer := hostConfig.PidMode.Container() 330 // if there is any error returned here, we just ignore it and leave it to be 331 // handled in the following logic 332 if c, err := daemon.GetContainer(pidContainer); err == nil { 333 hostConfig.PidMode = containertypes.PidMode(containerPrefix + c.ID) 334 } 335 } 336 if hostConfig.IpcMode.IsContainer() { 337 ipcContainer := hostConfig.IpcMode.Container() 338 if c, err := daemon.GetContainer(ipcContainer); err == nil { 339 hostConfig.IpcMode = containertypes.IpcMode(containerPrefix + c.ID) 340 } 341 } 342 if hostConfig.NetworkMode.IsContainer() { 343 netContainer := hostConfig.NetworkMode.ConnectedContainer() 344 if c, err := daemon.GetContainer(netContainer); err == nil { 345 hostConfig.NetworkMode = containertypes.NetworkMode(containerPrefix + c.ID) 346 } 347 } 348 } 349 350 func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) { 351 warnings := []string{} 352 fixMemorySwappiness(resources) 353 354 // memory subsystem checks and adjustments 355 if resources.Memory != 0 && resources.Memory < linuxMinMemory { 356 return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB") 357 } 358 if resources.Memory > 0 && !sysInfo.MemoryLimit { 359 warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 360 logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 361 resources.Memory = 0 362 resources.MemorySwap = -1 363 } 364 if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit { 365 warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") 366 logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.") 367 resources.MemorySwap = -1 368 } 369 if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory { 370 return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage") 371 } 372 if resources.Memory == 0 && resources.MemorySwap > 0 && !update { 373 return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage") 374 } 375 if resources.MemorySwappiness != nil && !sysInfo.MemorySwappiness { 376 warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.") 377 logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.") 378 resources.MemorySwappiness = nil 379 } 380 if resources.MemorySwappiness != nil { 381 swappiness := *resources.MemorySwappiness 382 if swappiness < 0 || swappiness > 100 { 383 return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness) 384 } 385 } 386 if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation { 387 warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 388 logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 389 resources.MemoryReservation = 0 390 } 391 if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory { 392 return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB") 393 } 394 if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation { 395 return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage") 396 } 397 if resources.KernelMemory > 0 && !sysInfo.KernelMemory { 398 warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 399 logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 400 resources.KernelMemory = 0 401 } 402 if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory { 403 return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB") 404 } 405 if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) { 406 warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 407 logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 408 } 409 if resources.OomKillDisable != nil && !sysInfo.OomKillDisable { 410 // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point 411 // warning the caller if they already wanted the feature to be off 412 if *resources.OomKillDisable { 413 warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.") 414 logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.") 415 } 416 resources.OomKillDisable = nil 417 } 418 419 if resources.PidsLimit != 0 && !sysInfo.PidsLimit { 420 warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 421 logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 422 resources.PidsLimit = 0 423 } 424 425 // cpu subsystem checks and adjustments 426 if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 { 427 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set") 428 } 429 if resources.NanoCPUs > 0 && resources.CPUQuota > 0 { 430 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set") 431 } 432 if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) { 433 return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted") 434 } 435 // The highest precision we could get on Linux is 0.001, by setting 436 // cpu.cfs_period_us=1000ms 437 // cpu.cfs_quota=1ms 438 // See the following link for details: 439 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 440 // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error. 441 // The error message is 0.01 so that this is consistent with Windows 442 if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 { 443 return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU()) 444 } 445 446 if resources.CPUShares > 0 && !sysInfo.CPUShares { 447 warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 448 logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 449 resources.CPUShares = 0 450 } 451 if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod { 452 warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 453 logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 454 resources.CPUPeriod = 0 455 } 456 if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) { 457 return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)") 458 } 459 if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota { 460 warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 461 logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 462 resources.CPUQuota = 0 463 } 464 if resources.CPUQuota > 0 && resources.CPUQuota < 1000 { 465 return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)") 466 } 467 if resources.CPUPercent > 0 { 468 warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)) 469 logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS) 470 resources.CPUPercent = 0 471 } 472 473 // cpuset subsystem checks and adjustments 474 if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset { 475 warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 476 logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 477 resources.CpusetCpus = "" 478 resources.CpusetMems = "" 479 } 480 cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus) 481 if err != nil { 482 return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus) 483 } 484 if !cpusAvailable { 485 return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus) 486 } 487 memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems) 488 if err != nil { 489 return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems) 490 } 491 if !memsAvailable { 492 return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems) 493 } 494 495 // blkio subsystem checks and adjustments 496 if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight { 497 warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 498 logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 499 resources.BlkioWeight = 0 500 } 501 if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) { 502 return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000") 503 } 504 if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 { 505 return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS) 506 } 507 if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice { 508 warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 509 logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 510 resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{} 511 } 512 if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice { 513 warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.") 514 logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded") 515 resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{} 516 } 517 if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice { 518 warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 519 logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 520 resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{} 521 522 } 523 if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice { 524 warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 525 logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 526 resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{} 527 } 528 if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice { 529 warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 530 logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 531 resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{} 532 } 533 534 return warnings, nil 535 } 536 537 func (daemon *Daemon) getCgroupDriver() string { 538 cgroupDriver := cgroupFsDriver 539 540 if UsingSystemd(daemon.configStore) { 541 cgroupDriver = cgroupSystemdDriver 542 } 543 return cgroupDriver 544 } 545 546 // getCD gets the raw value of the native.cgroupdriver option, if set. 547 func getCD(config *config.Config) string { 548 for _, option := range config.ExecOptions { 549 key, val, err := parsers.ParseKeyValueOpt(option) 550 if err != nil || !strings.EqualFold(key, "native.cgroupdriver") { 551 continue 552 } 553 return val 554 } 555 return "" 556 } 557 558 // VerifyCgroupDriver validates native.cgroupdriver 559 func VerifyCgroupDriver(config *config.Config) error { 560 cd := getCD(config) 561 if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver { 562 return nil 563 } 564 return fmt.Errorf("native.cgroupdriver option %s not supported", cd) 565 } 566 567 // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd 568 func UsingSystemd(config *config.Config) bool { 569 return getCD(config) == cgroupSystemdDriver 570 } 571 572 // verifyPlatformContainerSettings performs platform-specific validation of the 573 // hostconfig and config structures. 574 func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) { 575 var warnings []string 576 sysInfo := sysinfo.New(true) 577 578 warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config) 579 if err != nil { 580 return warnings, err 581 } 582 583 w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update) 584 585 // no matter err is nil or not, w could have data in itself. 586 warnings = append(warnings, w...) 587 588 if err != nil { 589 return warnings, err 590 } 591 592 if hostConfig.ShmSize < 0 { 593 return warnings, fmt.Errorf("SHM size can not be less than 0") 594 } 595 596 if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 { 597 return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj) 598 } 599 600 // ip-forwarding does not affect container with '--net=host' (or '--net=none') 601 if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) { 602 warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") 603 logrus.Warn("IPv4 forwarding is disabled. Networking will not work") 604 } 605 // check for various conflicting options with user namespaces 606 if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() { 607 if hostConfig.Privileged { 608 return warnings, fmt.Errorf("privileged mode is incompatible with user namespaces. You must run the container in the host namespace when running privileged mode") 609 } 610 if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 611 return warnings, fmt.Errorf("cannot share the host's network namespace when user namespaces are enabled") 612 } 613 if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 614 return warnings, fmt.Errorf("cannot share the host PID namespace when user namespaces are enabled") 615 } 616 } 617 if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) { 618 // CgroupParent for systemd cgroup should be named as "xxx.slice" 619 if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") { 620 return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 621 } 622 } 623 if hostConfig.Runtime == "" { 624 hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName() 625 } 626 627 if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil { 628 return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime) 629 } 630 631 parser := volume.NewParser(runtime.GOOS) 632 for dest := range hostConfig.Tmpfs { 633 if err := parser.ValidateTmpfsMountDestination(dest); err != nil { 634 return warnings, err 635 } 636 } 637 638 return warnings, nil 639 } 640 641 func (daemon *Daemon) loadRuntimes() error { 642 return daemon.initRuntimes(daemon.configStore.Runtimes) 643 } 644 645 func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) { 646 runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes") 647 // Remove old temp directory if any 648 os.RemoveAll(runtimeDir + "-old") 649 tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes") 650 if err != nil { 651 return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts") 652 } 653 defer func() { 654 if err != nil { 655 if err1 := os.RemoveAll(tmpDir); err1 != nil { 656 logrus.WithError(err1).WithField("dir", tmpDir). 657 Warnf("failed to remove tmp dir") 658 } 659 return 660 } 661 662 if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil { 663 return 664 } 665 if err = os.Rename(tmpDir, runtimeDir); err != nil { 666 err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start") 667 return 668 } 669 if err = os.RemoveAll(runtimeDir + "-old"); err != nil { 670 logrus.WithError(err).WithField("dir", tmpDir). 671 Warnf("failed to remove old runtimes dir") 672 } 673 }() 674 675 for name, rt := range runtimes { 676 if len(rt.Args) == 0 { 677 continue 678 } 679 680 script := filepath.Join(tmpDir, name) 681 content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " ")) 682 if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil { 683 return err 684 } 685 } 686 return nil 687 } 688 689 // reloadPlatform updates configuration with platform specific options 690 // and updates the passed attributes 691 func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error { 692 if err := conf.ValidatePlatformConfig(); err != nil { 693 return err 694 } 695 696 if conf.IsValueSet("runtimes") { 697 // Always set the default one 698 conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 699 if err := daemon.initRuntimes(conf.Runtimes); err != nil { 700 return err 701 } 702 daemon.configStore.Runtimes = conf.Runtimes 703 } 704 705 if conf.DefaultRuntime != "" { 706 daemon.configStore.DefaultRuntime = conf.DefaultRuntime 707 } 708 709 if conf.IsValueSet("default-shm-size") { 710 daemon.configStore.ShmSize = conf.ShmSize 711 } 712 713 if conf.IpcMode != "" { 714 daemon.configStore.IpcMode = conf.IpcMode 715 } 716 717 // Update attributes 718 var runtimeList bytes.Buffer 719 for name, rt := range daemon.configStore.Runtimes { 720 if runtimeList.Len() > 0 { 721 runtimeList.WriteRune(' ') 722 } 723 runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt)) 724 } 725 726 attributes["runtimes"] = runtimeList.String() 727 attributes["default-runtime"] = daemon.configStore.DefaultRuntime 728 attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize) 729 attributes["default-ipc-mode"] = daemon.configStore.IpcMode 730 731 return nil 732 } 733 734 // verifyDaemonSettings performs validation of daemon config struct 735 func verifyDaemonSettings(conf *config.Config) error { 736 // Check for mutually incompatible config options 737 if conf.BridgeConfig.Iface != "" && conf.BridgeConfig.IP != "" { 738 return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one") 739 } 740 if !conf.BridgeConfig.EnableIPTables && !conf.BridgeConfig.InterContainerCommunication { 741 return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true") 742 } 743 if !conf.BridgeConfig.EnableIPTables && conf.BridgeConfig.EnableIPMasq { 744 conf.BridgeConfig.EnableIPMasq = false 745 } 746 if err := VerifyCgroupDriver(conf); err != nil { 747 return err 748 } 749 if conf.CgroupParent != "" && UsingSystemd(conf) { 750 if len(conf.CgroupParent) <= 6 || !strings.HasSuffix(conf.CgroupParent, ".slice") { 751 return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 752 } 753 } 754 755 if conf.DefaultRuntime == "" { 756 conf.DefaultRuntime = config.StockRuntimeName 757 } 758 if conf.Runtimes == nil { 759 conf.Runtimes = make(map[string]types.Runtime) 760 } 761 conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName} 762 763 return nil 764 } 765 766 // checkSystem validates platform-specific requirements 767 func checkSystem() error { 768 if os.Geteuid() != 0 { 769 return fmt.Errorf("The Docker daemon needs to be run as root") 770 } 771 return checkKernel() 772 } 773 774 // configureMaxThreads sets the Go runtime max threads threshold 775 // which is 90% of the kernel setting from /proc/sys/kernel/threads-max 776 func configureMaxThreads(config *config.Config) error { 777 mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") 778 if err != nil { 779 return err 780 } 781 mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) 782 if err != nil { 783 return err 784 } 785 maxThreads := (mtint / 100) * 90 786 debug.SetMaxThreads(maxThreads) 787 logrus.Debugf("Golang's threads limit set to %d", maxThreads) 788 return nil 789 } 790 791 func overlaySupportsSelinux() (bool, error) { 792 f, err := os.Open("/proc/kallsyms") 793 if err != nil { 794 if os.IsNotExist(err) { 795 return false, nil 796 } 797 return false, err 798 } 799 defer f.Close() 800 801 var symAddr, symType, symName, text string 802 803 s := bufio.NewScanner(f) 804 for s.Scan() { 805 if err := s.Err(); err != nil { 806 return false, err 807 } 808 809 text = s.Text() 810 if _, err := fmt.Sscanf(text, "%s %s %s", &symAddr, &symType, &symName); err != nil { 811 return false, fmt.Errorf("Scanning '%s' failed: %s", text, err) 812 } 813 814 // Check for presence of symbol security_inode_copy_up. 815 if symName == "security_inode_copy_up" { 816 return true, nil 817 } 818 } 819 return false, nil 820 } 821 822 // configureKernelSecuritySupport configures and validates security support for the kernel 823 func configureKernelSecuritySupport(config *config.Config, driverNames []string) error { 824 if config.EnableSelinuxSupport { 825 if !selinuxEnabled() { 826 logrus.Warn("Docker could not enable SELinux on the host system") 827 return nil 828 } 829 830 overlayFound := false 831 for _, d := range driverNames { 832 if d == "overlay" || d == "overlay2" { 833 overlayFound = true 834 break 835 } 836 } 837 838 if overlayFound { 839 // If driver is overlay or overlay2, make sure kernel 840 // supports selinux with overlay. 841 supported, err := overlaySupportsSelinux() 842 if err != nil { 843 return err 844 } 845 846 if !supported { 847 logrus.Warnf("SELinux is not supported with the %v graph driver on this kernel", driverNames) 848 } 849 } 850 } else { 851 selinuxSetDisabled() 852 } 853 return nil 854 } 855 856 func (daemon *Daemon) initNetworkController(config *config.Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) { 857 netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes) 858 if err != nil { 859 return nil, err 860 } 861 862 controller, err := libnetwork.New(netOptions...) 863 if err != nil { 864 return nil, fmt.Errorf("error obtaining controller instance: %v", err) 865 } 866 867 if len(activeSandboxes) > 0 { 868 logrus.Info("There are old running containers, the network config will not take affect") 869 return controller, nil 870 } 871 872 // Initialize default network on "null" 873 if n, _ := controller.NetworkByName("none"); n == nil { 874 if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil { 875 return nil, fmt.Errorf("Error creating default \"null\" network: %v", err) 876 } 877 } 878 879 // Initialize default network on "host" 880 if n, _ := controller.NetworkByName("host"); n == nil { 881 if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil { 882 return nil, fmt.Errorf("Error creating default \"host\" network: %v", err) 883 } 884 } 885 886 // Clear stale bridge network 887 if n, err := controller.NetworkByName("bridge"); err == nil { 888 if err = n.Delete(); err != nil { 889 return nil, fmt.Errorf("could not delete the default bridge network: %v", err) 890 } 891 } 892 893 if !config.DisableBridge { 894 // Initialize default driver "bridge" 895 if err := initBridgeDriver(controller, config); err != nil { 896 return nil, err 897 } 898 } else { 899 removeDefaultBridgeInterface() 900 } 901 902 return controller, nil 903 } 904 905 func driverOptions(config *config.Config) []nwconfig.Option { 906 bridgeConfig := options.Generic{ 907 "EnableIPForwarding": config.BridgeConfig.EnableIPForward, 908 "EnableIPTables": config.BridgeConfig.EnableIPTables, 909 "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy, 910 "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath} 911 bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig} 912 913 dOptions := []nwconfig.Option{} 914 dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption)) 915 return dOptions 916 } 917 918 func initBridgeDriver(controller libnetwork.NetworkController, config *config.Config) error { 919 bridgeName := bridge.DefaultBridgeName 920 if config.BridgeConfig.Iface != "" { 921 bridgeName = config.BridgeConfig.Iface 922 } 923 netOption := map[string]string{ 924 bridge.BridgeName: bridgeName, 925 bridge.DefaultBridge: strconv.FormatBool(true), 926 netlabel.DriverMTU: strconv.Itoa(config.Mtu), 927 bridge.EnableIPMasquerade: strconv.FormatBool(config.BridgeConfig.EnableIPMasq), 928 bridge.EnableICC: strconv.FormatBool(config.BridgeConfig.InterContainerCommunication), 929 } 930 931 // --ip processing 932 if config.BridgeConfig.DefaultIP != nil { 933 netOption[bridge.DefaultBindingIP] = config.BridgeConfig.DefaultIP.String() 934 } 935 936 var ( 937 ipamV4Conf *libnetwork.IpamConf 938 ipamV6Conf *libnetwork.IpamConf 939 ) 940 941 ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 942 943 nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName) 944 if err != nil { 945 return errors.Wrap(err, "list bridge addresses failed") 946 } 947 948 nw := nwList[0] 949 if len(nwList) > 1 && config.BridgeConfig.FixedCIDR != "" { 950 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 951 if err != nil { 952 return errors.Wrap(err, "parse CIDR failed") 953 } 954 // Iterate through in case there are multiple addresses for the bridge 955 for _, entry := range nwList { 956 if fCIDR.Contains(entry.IP) { 957 nw = entry 958 break 959 } 960 } 961 } 962 963 ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() 964 hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) 965 if hip.IsGlobalUnicast() { 966 ipamV4Conf.Gateway = nw.IP.String() 967 } 968 969 if config.BridgeConfig.IP != "" { 970 ipamV4Conf.PreferredPool = config.BridgeConfig.IP 971 ip, _, err := net.ParseCIDR(config.BridgeConfig.IP) 972 if err != nil { 973 return err 974 } 975 ipamV4Conf.Gateway = ip.String() 976 } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" { 977 logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool) 978 } 979 980 if config.BridgeConfig.FixedCIDR != "" { 981 _, fCIDR, err := net.ParseCIDR(config.BridgeConfig.FixedCIDR) 982 if err != nil { 983 return err 984 } 985 986 ipamV4Conf.SubPool = fCIDR.String() 987 } 988 989 if config.BridgeConfig.DefaultGatewayIPv4 != nil { 990 ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.BridgeConfig.DefaultGatewayIPv4.String() 991 } 992 993 var deferIPv6Alloc bool 994 if config.BridgeConfig.FixedCIDRv6 != "" { 995 _, fCIDRv6, err := net.ParseCIDR(config.BridgeConfig.FixedCIDRv6) 996 if err != nil { 997 return err 998 } 999 1000 // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has 1001 // at least 48 host bits, we need to guarantee the current behavior where the containers' 1002 // IPv6 addresses will be constructed based on the containers' interface MAC address. 1003 // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints 1004 // on this network until after the driver has created the endpoint and returned the 1005 // constructed address. Libnetwork will then reserve this address with the ipam driver. 1006 ones, _ := fCIDRv6.Mask.Size() 1007 deferIPv6Alloc = ones <= 80 1008 1009 if ipamV6Conf == nil { 1010 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 1011 } 1012 ipamV6Conf.PreferredPool = fCIDRv6.String() 1013 1014 // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6 1015 // address belongs to the same network, we need to inform libnetwork about it, so 1016 // that it can be reserved with IPAM and it will not be given away to somebody else 1017 for _, nw6 := range nw6List { 1018 if fCIDRv6.Contains(nw6.IP) { 1019 ipamV6Conf.Gateway = nw6.IP.String() 1020 break 1021 } 1022 } 1023 } 1024 1025 if config.BridgeConfig.DefaultGatewayIPv6 != nil { 1026 if ipamV6Conf == nil { 1027 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 1028 } 1029 ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.BridgeConfig.DefaultGatewayIPv6.String() 1030 } 1031 1032 v4Conf := []*libnetwork.IpamConf{ipamV4Conf} 1033 v6Conf := []*libnetwork.IpamConf{} 1034 if ipamV6Conf != nil { 1035 v6Conf = append(v6Conf, ipamV6Conf) 1036 } 1037 // Initialize default network on "bridge" with the same name 1038 _, err = controller.NewNetwork("bridge", "bridge", "", 1039 libnetwork.NetworkOptionEnableIPv6(config.BridgeConfig.EnableIPv6), 1040 libnetwork.NetworkOptionDriverOpts(netOption), 1041 libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil), 1042 libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc)) 1043 if err != nil { 1044 return fmt.Errorf("Error creating default \"bridge\" network: %v", err) 1045 } 1046 return nil 1047 } 1048 1049 // Remove default bridge interface if present (--bridge=none use case) 1050 func removeDefaultBridgeInterface() { 1051 if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil { 1052 if err := netlink.LinkDel(lnk); err != nil { 1053 logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err) 1054 } 1055 } 1056 } 1057 1058 func (daemon *Daemon) getLayerInit() func(containerfs.ContainerFS) error { 1059 return daemon.setupInitLayer 1060 } 1061 1062 // Parse the remapped root (user namespace) option, which can be one of: 1063 // username - valid username from /etc/passwd 1064 // username:groupname - valid username; valid groupname from /etc/group 1065 // uid - 32-bit unsigned int valid Linux UID value 1066 // uid:gid - uid value; 32-bit unsigned int Linux GID value 1067 // 1068 // If no groupname is specified, and a username is specified, an attempt 1069 // will be made to lookup a gid for that username as a groupname 1070 // 1071 // If names are used, they are verified to exist in passwd/group 1072 func parseRemappedRoot(usergrp string) (string, string, error) { 1073 1074 var ( 1075 userID, groupID int 1076 username, groupname string 1077 ) 1078 1079 idparts := strings.Split(usergrp, ":") 1080 if len(idparts) > 2 { 1081 return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) 1082 } 1083 1084 if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { 1085 // must be a uid; take it as valid 1086 userID = int(uid) 1087 luser, err := idtools.LookupUID(userID) 1088 if err != nil { 1089 return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) 1090 } 1091 username = luser.Name 1092 if len(idparts) == 1 { 1093 // if the uid was numeric and no gid was specified, take the uid as the gid 1094 groupID = userID 1095 lgrp, err := idtools.LookupGID(groupID) 1096 if err != nil { 1097 return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) 1098 } 1099 groupname = lgrp.Name 1100 } 1101 } else { 1102 lookupName := idparts[0] 1103 // special case: if the user specified "default", they want Docker to create or 1104 // use (after creation) the "dockremap" user/group for root remapping 1105 if lookupName == defaultIDSpecifier { 1106 lookupName = defaultRemappedID 1107 } 1108 luser, err := idtools.LookupUser(lookupName) 1109 if err != nil && idparts[0] != defaultIDSpecifier { 1110 // error if the name requested isn't the special "dockremap" ID 1111 return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) 1112 } else if err != nil { 1113 // special case-- if the username == "default", then we have been asked 1114 // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} 1115 // ranges will be used for the user and group mappings in user namespaced containers 1116 _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) 1117 if err == nil { 1118 return defaultRemappedID, defaultRemappedID, nil 1119 } 1120 return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) 1121 } 1122 username = luser.Name 1123 if len(idparts) == 1 { 1124 // we only have a string username, and no group specified; look up gid from username as group 1125 group, err := idtools.LookupGroup(lookupName) 1126 if err != nil { 1127 return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) 1128 } 1129 groupname = group.Name 1130 } 1131 } 1132 1133 if len(idparts) == 2 { 1134 // groupname or gid is separately specified and must be resolved 1135 // to an unsigned 32-bit gid 1136 if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { 1137 // must be a gid, take it as valid 1138 groupID = int(gid) 1139 lgrp, err := idtools.LookupGID(groupID) 1140 if err != nil { 1141 return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) 1142 } 1143 groupname = lgrp.Name 1144 } else { 1145 // not a number; attempt a lookup 1146 if _, err := idtools.LookupGroup(idparts[1]); err != nil { 1147 return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err) 1148 } 1149 groupname = idparts[1] 1150 } 1151 } 1152 return username, groupname, nil 1153 } 1154 1155 func setupRemappedRoot(config *config.Config) (*idtools.IDMappings, error) { 1156 if runtime.GOOS != "linux" && config.RemappedRoot != "" { 1157 return nil, fmt.Errorf("User namespaces are only supported on Linux") 1158 } 1159 1160 // if the daemon was started with remapped root option, parse 1161 // the config option to the int uid,gid values 1162 if config.RemappedRoot != "" { 1163 username, groupname, err := parseRemappedRoot(config.RemappedRoot) 1164 if err != nil { 1165 return nil, err 1166 } 1167 if username == "root" { 1168 // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op 1169 // effectively 1170 logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF") 1171 return &idtools.IDMappings{}, nil 1172 } 1173 logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) 1174 // update remapped root setting now that we have resolved them to actual names 1175 config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) 1176 1177 mappings, err := idtools.NewIDMappings(username, groupname) 1178 if err != nil { 1179 return nil, errors.Wrapf(err, "Can't create ID mappings: %v") 1180 } 1181 return mappings, nil 1182 } 1183 return &idtools.IDMappings{}, nil 1184 } 1185 1186 func setupDaemonRoot(config *config.Config, rootDir string, rootIDs idtools.IDPair) error { 1187 config.Root = rootDir 1188 // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x) 1189 // so that syscalls executing as non-root, operating on subdirectories of the graph root 1190 // (e.g. mounted layers of a container) can traverse this path. 1191 // The user namespace support will create subdirectories for the remapped root host uid:gid 1192 // pair owned by that same uid:gid pair for proper write access to those needed metadata and 1193 // layer content subtrees. 1194 if _, err := os.Stat(rootDir); err == nil { 1195 // root current exists; verify the access bits are correct by setting them 1196 if err = os.Chmod(rootDir, 0711); err != nil { 1197 return err 1198 } 1199 } else if os.IsNotExist(err) { 1200 // no root exists yet, create it 0711 with root:root ownership 1201 if err := os.MkdirAll(rootDir, 0711); err != nil { 1202 return err 1203 } 1204 } 1205 1206 // if user namespaces are enabled we will create a subtree underneath the specified root 1207 // with any/all specified remapped root uid/gid options on the daemon creating 1208 // a new subdirectory with ownership set to the remapped uid/gid (so as to allow 1209 // `chdir()` to work for containers namespaced to that uid/gid) 1210 if config.RemappedRoot != "" { 1211 config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootIDs.UID, rootIDs.GID)) 1212 logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) 1213 // Create the root directory if it doesn't exist 1214 if err := idtools.MkdirAllAndChown(config.Root, 0700, rootIDs); err != nil { 1215 return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) 1216 } 1217 // we also need to verify that any pre-existing directories in the path to 1218 // the graphroot won't block access to remapped root--if any pre-existing directory 1219 // has strict permissions that don't allow "x", container start will fail, so 1220 // better to warn and fail now 1221 dirPath := config.Root 1222 for { 1223 dirPath = filepath.Dir(dirPath) 1224 if dirPath == "/" { 1225 break 1226 } 1227 if !idtools.CanAccess(dirPath, rootIDs) { 1228 return fmt.Errorf("a subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories", config.Root) 1229 } 1230 } 1231 } 1232 1233 if err := ensureSharedOrSlave(config.Root); err != nil { 1234 if err := mount.MakeShared(config.Root); err != nil { 1235 logrus.WithError(err).WithField("dir", config.Root).Warn("Could not set daemon root propagation to shared, this is not generally critical but may cause some functionality to not work or fallback to less desirable behavior") 1236 } 1237 } 1238 return nil 1239 } 1240 1241 // registerLinks writes the links to a file. 1242 func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error { 1243 if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() { 1244 return nil 1245 } 1246 1247 for _, l := range hostConfig.Links { 1248 name, alias, err := opts.ParseLink(l) 1249 if err != nil { 1250 return err 1251 } 1252 child, err := daemon.GetContainer(name) 1253 if err != nil { 1254 return errors.Wrapf(err, "could not get container for %s", name) 1255 } 1256 for child.HostConfig.NetworkMode.IsContainer() { 1257 parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2) 1258 child, err = daemon.GetContainer(parts[1]) 1259 if err != nil { 1260 return errors.Wrapf(err, "Could not get container for %s", parts[1]) 1261 } 1262 } 1263 if child.HostConfig.NetworkMode.IsHost() { 1264 return runconfig.ErrConflictHostNetworkAndLinks 1265 } 1266 if err := daemon.registerLink(container, child, alias); err != nil { 1267 return err 1268 } 1269 } 1270 1271 // After we load all the links into the daemon 1272 // set them to nil on the hostconfig 1273 _, err := container.WriteHostConfig() 1274 return err 1275 } 1276 1277 // conditionalMountOnStart is a platform specific helper function during the 1278 // container start to call mount. 1279 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { 1280 return daemon.Mount(container) 1281 } 1282 1283 // conditionalUnmountOnCleanup is a platform specific helper function called 1284 // during the cleanup of a container to unmount. 1285 func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { 1286 return daemon.Unmount(container) 1287 } 1288 1289 func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry { 1290 out := make([]types.BlkioStatEntry, len(entries)) 1291 for i, re := range entries { 1292 out[i] = types.BlkioStatEntry{ 1293 Major: re.Major, 1294 Minor: re.Minor, 1295 Op: re.Op, 1296 Value: re.Value, 1297 } 1298 } 1299 return out 1300 } 1301 1302 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { 1303 if !c.IsRunning() { 1304 return nil, errNotRunning(c.ID) 1305 } 1306 cs, err := daemon.containerd.Stats(context.Background(), c.ID) 1307 if err != nil { 1308 if strings.Contains(err.Error(), "container not found") { 1309 return nil, containerNotFound(c.ID) 1310 } 1311 return nil, err 1312 } 1313 s := &types.StatsJSON{} 1314 s.Read = cs.Read 1315 stats := cs.Metrics 1316 if stats.Blkio != nil { 1317 s.BlkioStats = types.BlkioStats{ 1318 IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive), 1319 IoServicedRecursive: copyBlkioEntry(stats.Blkio.IoServicedRecursive), 1320 IoQueuedRecursive: copyBlkioEntry(stats.Blkio.IoQueuedRecursive), 1321 IoServiceTimeRecursive: copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive), 1322 IoWaitTimeRecursive: copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive), 1323 IoMergedRecursive: copyBlkioEntry(stats.Blkio.IoMergedRecursive), 1324 IoTimeRecursive: copyBlkioEntry(stats.Blkio.IoTimeRecursive), 1325 SectorsRecursive: copyBlkioEntry(stats.Blkio.SectorsRecursive), 1326 } 1327 } 1328 if stats.CPU != nil { 1329 s.CPUStats = types.CPUStats{ 1330 CPUUsage: types.CPUUsage{ 1331 TotalUsage: stats.CPU.Usage.Total, 1332 PercpuUsage: stats.CPU.Usage.PerCPU, 1333 UsageInKernelmode: stats.CPU.Usage.Kernel, 1334 UsageInUsermode: stats.CPU.Usage.User, 1335 }, 1336 ThrottlingData: types.ThrottlingData{ 1337 Periods: stats.CPU.Throttling.Periods, 1338 ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods, 1339 ThrottledTime: stats.CPU.Throttling.ThrottledTime, 1340 }, 1341 } 1342 } 1343 1344 if stats.Memory != nil { 1345 raw := make(map[string]uint64) 1346 raw["cache"] = stats.Memory.Cache 1347 raw["rss"] = stats.Memory.RSS 1348 raw["rss_huge"] = stats.Memory.RSSHuge 1349 raw["mapped_file"] = stats.Memory.MappedFile 1350 raw["dirty"] = stats.Memory.Dirty 1351 raw["writeback"] = stats.Memory.Writeback 1352 raw["pgpgin"] = stats.Memory.PgPgIn 1353 raw["pgpgout"] = stats.Memory.PgPgOut 1354 raw["pgfault"] = stats.Memory.PgFault 1355 raw["pgmajfault"] = stats.Memory.PgMajFault 1356 raw["inactive_anon"] = stats.Memory.InactiveAnon 1357 raw["active_anon"] = stats.Memory.ActiveAnon 1358 raw["inactive_file"] = stats.Memory.InactiveFile 1359 raw["active_file"] = stats.Memory.ActiveFile 1360 raw["unevictable"] = stats.Memory.Unevictable 1361 raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit 1362 raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit 1363 raw["total_cache"] = stats.Memory.TotalCache 1364 raw["total_rss"] = stats.Memory.TotalRSS 1365 raw["total_rss_huge"] = stats.Memory.TotalRSSHuge 1366 raw["total_mapped_file"] = stats.Memory.TotalMappedFile 1367 raw["total_dirty"] = stats.Memory.TotalDirty 1368 raw["total_writeback"] = stats.Memory.TotalWriteback 1369 raw["total_pgpgin"] = stats.Memory.TotalPgPgIn 1370 raw["total_pgpgout"] = stats.Memory.TotalPgPgOut 1371 raw["total_pgfault"] = stats.Memory.TotalPgFault 1372 raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault 1373 raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon 1374 raw["total_active_anon"] = stats.Memory.TotalActiveAnon 1375 raw["total_inactive_file"] = stats.Memory.TotalInactiveFile 1376 raw["total_active_file"] = stats.Memory.TotalActiveFile 1377 raw["total_unevictable"] = stats.Memory.TotalUnevictable 1378 1379 if stats.Memory.Usage != nil { 1380 s.MemoryStats = types.MemoryStats{ 1381 Stats: raw, 1382 Usage: stats.Memory.Usage.Usage, 1383 MaxUsage: stats.Memory.Usage.Max, 1384 Limit: stats.Memory.Usage.Limit, 1385 Failcnt: stats.Memory.Usage.Failcnt, 1386 } 1387 } else { 1388 s.MemoryStats = types.MemoryStats{ 1389 Stats: raw, 1390 } 1391 } 1392 1393 // if the container does not set memory limit, use the machineMemory 1394 if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 { 1395 s.MemoryStats.Limit = daemon.machineMemory 1396 } 1397 } 1398 1399 if stats.Pids != nil { 1400 s.PidsStats = types.PidsStats{ 1401 Current: stats.Pids.Current, 1402 Limit: stats.Pids.Limit, 1403 } 1404 } 1405 1406 return s, nil 1407 } 1408 1409 // setDefaultIsolation determines the default isolation mode for the 1410 // daemon to run in. This is only applicable on Windows 1411 func (daemon *Daemon) setDefaultIsolation() error { 1412 return nil 1413 } 1414 1415 func rootFSToAPIType(rootfs *image.RootFS) types.RootFS { 1416 var layers []string 1417 for _, l := range rootfs.DiffIDs { 1418 layers = append(layers, l.String()) 1419 } 1420 return types.RootFS{ 1421 Type: rootfs.Type, 1422 Layers: layers, 1423 } 1424 } 1425 1426 // setupDaemonProcess sets various settings for the daemon's process 1427 func setupDaemonProcess(config *config.Config) error { 1428 // setup the daemons oom_score_adj 1429 if err := setupOOMScoreAdj(config.OOMScoreAdjust); err != nil { 1430 return err 1431 } 1432 if err := setMayDetachMounts(); err != nil { 1433 logrus.WithError(err).Warn("Could not set may_detach_mounts kernel parameter") 1434 } 1435 return nil 1436 } 1437 1438 // This is used to allow removal of mountpoints that may be mounted in other 1439 // namespaces on RHEL based kernels starting from RHEL 7.4. 1440 // Without this setting, removals on these RHEL based kernels may fail with 1441 // "device or resource busy". 1442 // This setting is not available in upstream kernels as it is not configurable, 1443 // but has been in the upstream kernels since 3.15. 1444 func setMayDetachMounts() error { 1445 f, err := os.OpenFile("/proc/sys/fs/may_detach_mounts", os.O_WRONLY, 0) 1446 if err != nil { 1447 if os.IsNotExist(err) { 1448 return nil 1449 } 1450 return errors.Wrap(err, "error opening may_detach_mounts kernel config file") 1451 } 1452 defer f.Close() 1453 1454 _, err = f.WriteString("1") 1455 if os.IsPermission(err) { 1456 // Setting may_detach_mounts does not work in an 1457 // unprivileged container. Ignore the error, but log 1458 // it if we appear not to be in that situation. 1459 if !rsystem.RunningInUserNS() { 1460 logrus.Debugf("Permission denied writing %q to /proc/sys/fs/may_detach_mounts", "1") 1461 } 1462 return nil 1463 } 1464 return err 1465 } 1466 1467 func setupOOMScoreAdj(score int) error { 1468 f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0) 1469 if err != nil { 1470 return err 1471 } 1472 defer f.Close() 1473 stringScore := strconv.Itoa(score) 1474 _, err = f.WriteString(stringScore) 1475 if os.IsPermission(err) { 1476 // Setting oom_score_adj does not work in an 1477 // unprivileged container. Ignore the error, but log 1478 // it if we appear not to be in that situation. 1479 if !rsystem.RunningInUserNS() { 1480 logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore) 1481 } 1482 return nil 1483 } 1484 1485 return err 1486 } 1487 1488 func (daemon *Daemon) initCgroupsPath(path string) error { 1489 if path == "/" || path == "." { 1490 return nil 1491 } 1492 1493 if daemon.configStore.CPURealtimePeriod == 0 && daemon.configStore.CPURealtimeRuntime == 0 { 1494 return nil 1495 } 1496 1497 // Recursively create cgroup to ensure that the system and all parent cgroups have values set 1498 // for the period and runtime as this limits what the children can be set to. 1499 daemon.initCgroupsPath(filepath.Dir(path)) 1500 1501 mnt, root, err := cgroups.FindCgroupMountpointAndRoot("cpu") 1502 if err != nil { 1503 return err 1504 } 1505 // When docker is run inside docker, the root is based of the host cgroup. 1506 // Should this be handled in runc/libcontainer/cgroups ? 1507 if strings.HasPrefix(root, "/docker/") { 1508 root = "/" 1509 } 1510 1511 path = filepath.Join(mnt, root, path) 1512 sysinfo := sysinfo.New(true) 1513 if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil { 1514 return err 1515 } 1516 if err := maybeCreateCPURealTimeFile(sysinfo.CPURealtimeRuntime, daemon.configStore.CPURealtimeRuntime, "cpu.rt_runtime_us", path); err != nil { 1517 return err 1518 } 1519 return nil 1520 } 1521 1522 func maybeCreateCPURealTimeFile(sysinfoPresent bool, configValue int64, file string, path string) error { 1523 if sysinfoPresent && configValue != 0 { 1524 if err := os.MkdirAll(path, 0755); err != nil { 1525 return err 1526 } 1527 if err := ioutil.WriteFile(filepath.Join(path, file), []byte(strconv.FormatInt(configValue, 10)), 0700); err != nil { 1528 return err 1529 } 1530 } 1531 return nil 1532 } 1533 1534 func (daemon *Daemon) setupSeccompProfile() error { 1535 if daemon.configStore.SeccompProfile != "" { 1536 daemon.seccompProfilePath = daemon.configStore.SeccompProfile 1537 b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile) 1538 if err != nil { 1539 return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err) 1540 } 1541 daemon.seccompProfile = b 1542 } 1543 return nil 1544 }