github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/daemon/daemon_unix.go (about) 1 // +build linux freebsd 2 3 package daemon 4 5 import ( 6 "bytes" 7 "fmt" 8 "io/ioutil" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "runtime/debug" 14 "strconv" 15 "strings" 16 "syscall" 17 "time" 18 19 "github.com/Sirupsen/logrus" 20 "github.com/docker/docker/api/types" 21 "github.com/docker/docker/api/types/blkiodev" 22 pblkiodev "github.com/docker/docker/api/types/blkiodev" 23 containertypes "github.com/docker/docker/api/types/container" 24 "github.com/docker/docker/container" 25 "github.com/docker/docker/image" 26 "github.com/docker/docker/pkg/idtools" 27 "github.com/docker/docker/pkg/parsers" 28 "github.com/docker/docker/pkg/parsers/kernel" 29 "github.com/docker/docker/pkg/sysinfo" 30 "github.com/docker/docker/runconfig" 31 runconfigopts "github.com/docker/docker/runconfig/opts" 32 "github.com/docker/libnetwork" 33 nwconfig "github.com/docker/libnetwork/config" 34 "github.com/docker/libnetwork/drivers/bridge" 35 "github.com/docker/libnetwork/netlabel" 36 "github.com/docker/libnetwork/netutils" 37 "github.com/docker/libnetwork/options" 38 lntypes "github.com/docker/libnetwork/types" 39 "github.com/golang/protobuf/ptypes" 40 "github.com/opencontainers/runc/libcontainer/cgroups" 41 "github.com/opencontainers/runc/libcontainer/label" 42 rsystem "github.com/opencontainers/runc/libcontainer/system" 43 specs "github.com/opencontainers/runtime-spec/specs-go" 44 "github.com/pkg/errors" 45 "github.com/vishvananda/netlink" 46 ) 47 48 const ( 49 // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269 50 linuxMinCPUShares = 2 51 linuxMaxCPUShares = 262144 52 platformSupported = true 53 // It's not kernel limit, we want this 4M limit to supply a reasonable functional container 54 linuxMinMemory = 4194304 55 // constants for remapped root settings 56 defaultIDSpecifier string = "default" 57 defaultRemappedID string = "dockremap" 58 59 // constant for cgroup drivers 60 cgroupFsDriver = "cgroupfs" 61 cgroupSystemdDriver = "systemd" 62 ) 63 64 func getMemoryResources(config containertypes.Resources) *specs.Memory { 65 memory := specs.Memory{} 66 67 if config.Memory > 0 { 68 limit := uint64(config.Memory) 69 memory.Limit = &limit 70 } 71 72 if config.MemoryReservation > 0 { 73 reservation := uint64(config.MemoryReservation) 74 memory.Reservation = &reservation 75 } 76 77 if config.MemorySwap != 0 { 78 swap := uint64(config.MemorySwap) 79 memory.Swap = &swap 80 } 81 82 if config.MemorySwappiness != nil { 83 swappiness := uint64(*config.MemorySwappiness) 84 memory.Swappiness = &swappiness 85 } 86 87 if config.KernelMemory != 0 { 88 kernelMemory := uint64(config.KernelMemory) 89 memory.Kernel = &kernelMemory 90 } 91 92 return &memory 93 } 94 95 func getCPUResources(config containertypes.Resources) *specs.CPU { 96 cpu := specs.CPU{} 97 98 if config.CPUShares != 0 { 99 shares := uint64(config.CPUShares) 100 cpu.Shares = &shares 101 } 102 103 if config.CpusetCpus != "" { 104 cpuset := config.CpusetCpus 105 cpu.Cpus = &cpuset 106 } 107 108 if config.CpusetMems != "" { 109 cpuset := config.CpusetMems 110 cpu.Mems = &cpuset 111 } 112 113 if config.NanoCPUs > 0 { 114 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 115 period := uint64(100 * time.Millisecond / time.Microsecond) 116 quota := uint64(config.NanoCPUs) * period / 1e9 117 cpu.Period = &period 118 cpu.Quota = "a 119 } 120 121 if config.CPUPeriod != 0 { 122 period := uint64(config.CPUPeriod) 123 cpu.Period = &period 124 } 125 126 if config.CPUQuota != 0 { 127 quota := uint64(config.CPUQuota) 128 cpu.Quota = "a 129 } 130 131 if config.CPURealtimePeriod != 0 { 132 period := uint64(config.CPURealtimePeriod) 133 cpu.RealtimePeriod = &period 134 } 135 136 if config.CPURealtimeRuntime != 0 { 137 runtime := uint64(config.CPURealtimeRuntime) 138 cpu.RealtimeRuntime = &runtime 139 } 140 141 return &cpu 142 } 143 144 func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) { 145 var stat syscall.Stat_t 146 var blkioWeightDevices []specs.WeightDevice 147 148 for _, weightDevice := range config.BlkioWeightDevice { 149 if err := syscall.Stat(weightDevice.Path, &stat); err != nil { 150 return nil, err 151 } 152 weight := weightDevice.Weight 153 d := specs.WeightDevice{Weight: &weight} 154 d.Major = int64(stat.Rdev / 256) 155 d.Minor = int64(stat.Rdev % 256) 156 blkioWeightDevices = append(blkioWeightDevices, d) 157 } 158 159 return blkioWeightDevices, nil 160 } 161 162 func parseSecurityOpt(container *container.Container, config *containertypes.HostConfig) error { 163 var ( 164 labelOpts []string 165 err error 166 ) 167 168 for _, opt := range config.SecurityOpt { 169 if opt == "no-new-privileges" { 170 container.NoNewPrivileges = true 171 continue 172 } 173 174 var con []string 175 if strings.Contains(opt, "=") { 176 con = strings.SplitN(opt, "=", 2) 177 } else if strings.Contains(opt, ":") { 178 con = strings.SplitN(opt, ":", 2) 179 logrus.Warn("Security options with `:` as a separator are deprecated and will be completely unsupported in 1.14, use `=` instead.") 180 } 181 182 if len(con) != 2 { 183 return fmt.Errorf("invalid --security-opt 1: %q", opt) 184 } 185 186 switch con[0] { 187 case "label": 188 labelOpts = append(labelOpts, con[1]) 189 case "apparmor": 190 container.AppArmorProfile = con[1] 191 case "seccomp": 192 container.SeccompProfile = con[1] 193 default: 194 return fmt.Errorf("invalid --security-opt 2: %q", opt) 195 } 196 } 197 198 container.ProcessLabel, container.MountLabel, err = label.InitLabels(labelOpts) 199 return err 200 } 201 202 func getBlkioThrottleDevices(devs []*blkiodev.ThrottleDevice) ([]specs.ThrottleDevice, error) { 203 var throttleDevices []specs.ThrottleDevice 204 var stat syscall.Stat_t 205 206 for _, d := range devs { 207 if err := syscall.Stat(d.Path, &stat); err != nil { 208 return nil, err 209 } 210 rate := d.Rate 211 d := specs.ThrottleDevice{Rate: &rate} 212 d.Major = int64(stat.Rdev / 256) 213 d.Minor = int64(stat.Rdev % 256) 214 throttleDevices = append(throttleDevices, d) 215 } 216 217 return throttleDevices, nil 218 } 219 220 func checkKernel() error { 221 // Check for unsupported kernel versions 222 // FIXME: it would be cleaner to not test for specific versions, but rather 223 // test for specific functionalities. 224 // Unfortunately we can't test for the feature "does not cause a kernel panic" 225 // without actually causing a kernel panic, so we need this workaround until 226 // the circumstances of pre-3.10 crashes are clearer. 227 // For details see https://github.com/docker/docker/issues/407 228 // Docker 1.11 and above doesn't actually run on kernels older than 3.4, 229 // due to containerd-shim usage of PR_SET_CHILD_SUBREAPER (introduced in 3.4). 230 if !kernel.CheckKernelVersion(3, 10, 0) { 231 v, _ := kernel.GetKernelVersion() 232 if os.Getenv("DOCKER_NOWARN_KERNEL_VERSION") == "" { 233 logrus.Fatalf("Your Linux kernel version %s is not supported for running docker. Please upgrade your kernel to 3.10.0 or newer.", v.String()) 234 } 235 } 236 return nil 237 } 238 239 // adaptContainerSettings is called during container creation to modify any 240 // settings necessary in the HostConfig structure. 241 func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConfig, adjustCPUShares bool) error { 242 if adjustCPUShares && hostConfig.CPUShares > 0 { 243 // Handle unsupported CPUShares 244 if hostConfig.CPUShares < linuxMinCPUShares { 245 logrus.Warnf("Changing requested CPUShares of %d to minimum allowed of %d", hostConfig.CPUShares, linuxMinCPUShares) 246 hostConfig.CPUShares = linuxMinCPUShares 247 } else if hostConfig.CPUShares > linuxMaxCPUShares { 248 logrus.Warnf("Changing requested CPUShares of %d to maximum allowed of %d", hostConfig.CPUShares, linuxMaxCPUShares) 249 hostConfig.CPUShares = linuxMaxCPUShares 250 } 251 } 252 if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 { 253 // By default, MemorySwap is set to twice the size of Memory. 254 hostConfig.MemorySwap = hostConfig.Memory * 2 255 } 256 if hostConfig.ShmSize == 0 { 257 hostConfig.ShmSize = container.DefaultSHMSize 258 } 259 var err error 260 opts, err := daemon.generateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode, hostConfig.Privileged) 261 if err != nil { 262 return err 263 } 264 hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, opts...) 265 if hostConfig.MemorySwappiness == nil { 266 defaultSwappiness := int64(-1) 267 hostConfig.MemorySwappiness = &defaultSwappiness 268 } 269 if hostConfig.OomKillDisable == nil { 270 defaultOomKillDisable := false 271 hostConfig.OomKillDisable = &defaultOomKillDisable 272 } 273 274 return nil 275 } 276 277 func verifyContainerResources(resources *containertypes.Resources, sysInfo *sysinfo.SysInfo, update bool) ([]string, error) { 278 warnings := []string{} 279 280 // memory subsystem checks and adjustments 281 if resources.Memory != 0 && resources.Memory < linuxMinMemory { 282 return warnings, fmt.Errorf("Minimum memory limit allowed is 4MB") 283 } 284 if resources.Memory > 0 && !sysInfo.MemoryLimit { 285 warnings = append(warnings, "Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 286 logrus.Warn("Your kernel does not support memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 287 resources.Memory = 0 288 resources.MemorySwap = -1 289 } 290 if resources.Memory > 0 && resources.MemorySwap != -1 && !sysInfo.SwapLimit { 291 warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") 292 logrus.Warn("Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.") 293 resources.MemorySwap = -1 294 } 295 if resources.Memory > 0 && resources.MemorySwap > 0 && resources.MemorySwap < resources.Memory { 296 return warnings, fmt.Errorf("Minimum memoryswap limit should be larger than memory limit, see usage") 297 } 298 if resources.Memory == 0 && resources.MemorySwap > 0 && !update { 299 return warnings, fmt.Errorf("You should always set the Memory limit when using Memoryswap limit, see usage") 300 } 301 if resources.MemorySwappiness != nil && *resources.MemorySwappiness != -1 && !sysInfo.MemorySwappiness { 302 warnings = append(warnings, "Your kernel does not support memory swappiness capabilities or the cgroup is not mounted. Memory swappiness discarded.") 303 logrus.Warn("Your kernel does not support memory swappiness capabilities, or the cgroup is not mounted. Memory swappiness discarded.") 304 resources.MemorySwappiness = nil 305 } 306 if resources.MemorySwappiness != nil { 307 swappiness := *resources.MemorySwappiness 308 if swappiness < -1 || swappiness > 100 { 309 return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100", swappiness) 310 } 311 } 312 if resources.MemoryReservation > 0 && !sysInfo.MemoryReservation { 313 warnings = append(warnings, "Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 314 logrus.Warn("Your kernel does not support memory soft limit capabilities or the cgroup is not mounted. Limitation discarded.") 315 resources.MemoryReservation = 0 316 } 317 if resources.MemoryReservation > 0 && resources.MemoryReservation < linuxMinMemory { 318 return warnings, fmt.Errorf("Minimum memory reservation allowed is 4MB") 319 } 320 if resources.Memory > 0 && resources.MemoryReservation > 0 && resources.Memory < resources.MemoryReservation { 321 return warnings, fmt.Errorf("Minimum memory limit can not be less than memory reservation limit, see usage") 322 } 323 if resources.KernelMemory > 0 && !sysInfo.KernelMemory { 324 warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 325 logrus.Warn("Your kernel does not support kernel memory limit capabilities or the cgroup is not mounted. Limitation discarded.") 326 resources.KernelMemory = 0 327 } 328 if resources.KernelMemory > 0 && resources.KernelMemory < linuxMinMemory { 329 return warnings, fmt.Errorf("Minimum kernel memory limit allowed is 4MB") 330 } 331 if resources.KernelMemory > 0 && !kernel.CheckKernelVersion(4, 0, 0) { 332 warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 333 logrus.Warn("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.") 334 } 335 if resources.OomKillDisable != nil && !sysInfo.OomKillDisable { 336 // only produce warnings if the setting wasn't to *disable* the OOM Kill; no point 337 // warning the caller if they already wanted the feature to be off 338 if *resources.OomKillDisable { 339 warnings = append(warnings, "Your kernel does not support OomKillDisable. OomKillDisable discarded.") 340 logrus.Warn("Your kernel does not support OomKillDisable. OomKillDisable discarded.") 341 } 342 resources.OomKillDisable = nil 343 } 344 345 if resources.PidsLimit != 0 && !sysInfo.PidsLimit { 346 warnings = append(warnings, "Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 347 logrus.Warn("Your kernel does not support pids limit capabilities or the cgroup is not mounted. PIDs limit discarded.") 348 resources.PidsLimit = 0 349 } 350 351 // cpu subsystem checks and adjustments 352 if resources.NanoCPUs > 0 && resources.CPUPeriod > 0 { 353 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Period cannot both be set") 354 } 355 if resources.NanoCPUs > 0 && resources.CPUQuota > 0 { 356 return warnings, fmt.Errorf("Conflicting options: Nano CPUs and CPU Quota cannot both be set") 357 } 358 if resources.NanoCPUs > 0 && (!sysInfo.CPUCfsPeriod || !sysInfo.CPUCfsQuota) { 359 return warnings, fmt.Errorf("NanoCPUs can not be set, as your kernel does not support CPU cfs period/quota or the cgroup is not mounted") 360 } 361 // The highest precision we could get on Linux is 0.001, by setting 362 // cpu.cfs_period_us=1000ms 363 // cpu.cfs_quota=1ms 364 // See the following link for details: 365 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 366 // Here we don't set the lower limit and it is up to the underlying platform (e.g., Linux) to return an error. 367 // The error message is 0.01 so that this is consistent with Windows 368 if resources.NanoCPUs < 0 || resources.NanoCPUs > int64(sysinfo.NumCPU())*1e9 { 369 return warnings, fmt.Errorf("Range of CPUs is from 0.01 to %d.00, as there are only %d CPUs available", sysinfo.NumCPU(), sysinfo.NumCPU()) 370 } 371 372 if resources.CPUShares > 0 && !sysInfo.CPUShares { 373 warnings = append(warnings, "Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 374 logrus.Warn("Your kernel does not support CPU shares or the cgroup is not mounted. Shares discarded.") 375 resources.CPUShares = 0 376 } 377 if resources.CPUPeriod > 0 && !sysInfo.CPUCfsPeriod { 378 warnings = append(warnings, "Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 379 logrus.Warn("Your kernel does not support CPU cfs period or the cgroup is not mounted. Period discarded.") 380 resources.CPUPeriod = 0 381 } 382 if resources.CPUPeriod != 0 && (resources.CPUPeriod < 1000 || resources.CPUPeriod > 1000000) { 383 return warnings, fmt.Errorf("CPU cfs period can not be less than 1ms (i.e. 1000) or larger than 1s (i.e. 1000000)") 384 } 385 if resources.CPUQuota > 0 && !sysInfo.CPUCfsQuota { 386 warnings = append(warnings, "Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 387 logrus.Warn("Your kernel does not support CPU cfs quota or the cgroup is not mounted. Quota discarded.") 388 resources.CPUQuota = 0 389 } 390 if resources.CPUQuota > 0 && resources.CPUQuota < 1000 { 391 return warnings, fmt.Errorf("CPU cfs quota can not be less than 1ms (i.e. 1000)") 392 } 393 if resources.CPUPercent > 0 { 394 warnings = append(warnings, fmt.Sprintf("%s does not support CPU percent. Percent discarded.", runtime.GOOS)) 395 logrus.Warnf("%s does not support CPU percent. Percent discarded.", runtime.GOOS) 396 resources.CPUPercent = 0 397 } 398 399 // cpuset subsystem checks and adjustments 400 if (resources.CpusetCpus != "" || resources.CpusetMems != "") && !sysInfo.Cpuset { 401 warnings = append(warnings, "Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 402 logrus.Warn("Your kernel does not support cpuset or the cgroup is not mounted. Cpuset discarded.") 403 resources.CpusetCpus = "" 404 resources.CpusetMems = "" 405 } 406 cpusAvailable, err := sysInfo.IsCpusetCpusAvailable(resources.CpusetCpus) 407 if err != nil { 408 return warnings, fmt.Errorf("Invalid value %s for cpuset cpus", resources.CpusetCpus) 409 } 410 if !cpusAvailable { 411 return warnings, fmt.Errorf("Requested CPUs are not available - requested %s, available: %s", resources.CpusetCpus, sysInfo.Cpus) 412 } 413 memsAvailable, err := sysInfo.IsCpusetMemsAvailable(resources.CpusetMems) 414 if err != nil { 415 return warnings, fmt.Errorf("Invalid value %s for cpuset mems", resources.CpusetMems) 416 } 417 if !memsAvailable { 418 return warnings, fmt.Errorf("Requested memory nodes are not available - requested %s, available: %s", resources.CpusetMems, sysInfo.Mems) 419 } 420 421 // blkio subsystem checks and adjustments 422 if resources.BlkioWeight > 0 && !sysInfo.BlkioWeight { 423 warnings = append(warnings, "Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 424 logrus.Warn("Your kernel does not support Block I/O weight or the cgroup is not mounted. Weight discarded.") 425 resources.BlkioWeight = 0 426 } 427 if resources.BlkioWeight > 0 && (resources.BlkioWeight < 10 || resources.BlkioWeight > 1000) { 428 return warnings, fmt.Errorf("Range of blkio weight is from 10 to 1000") 429 } 430 if resources.IOMaximumBandwidth != 0 || resources.IOMaximumIOps != 0 { 431 return warnings, fmt.Errorf("Invalid QoS settings: %s does not support Maximum IO Bandwidth or Maximum IO IOps", runtime.GOOS) 432 } 433 if len(resources.BlkioWeightDevice) > 0 && !sysInfo.BlkioWeightDevice { 434 warnings = append(warnings, "Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 435 logrus.Warn("Your kernel does not support Block I/O weight_device or the cgroup is not mounted. Weight-device discarded.") 436 resources.BlkioWeightDevice = []*pblkiodev.WeightDevice{} 437 } 438 if len(resources.BlkioDeviceReadBps) > 0 && !sysInfo.BlkioReadBpsDevice { 439 warnings = append(warnings, "Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded.") 440 logrus.Warn("Your kernel does not support BPS Block I/O read limit or the cgroup is not mounted. Block I/O BPS read limit discarded") 441 resources.BlkioDeviceReadBps = []*pblkiodev.ThrottleDevice{} 442 } 443 if len(resources.BlkioDeviceWriteBps) > 0 && !sysInfo.BlkioWriteBpsDevice { 444 warnings = append(warnings, "Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 445 logrus.Warn("Your kernel does not support BPS Block I/O write limit or the cgroup is not mounted. Block I/O BPS write limit discarded.") 446 resources.BlkioDeviceWriteBps = []*pblkiodev.ThrottleDevice{} 447 } 448 if len(resources.BlkioDeviceReadIOps) > 0 && !sysInfo.BlkioReadIOpsDevice { 449 warnings = append(warnings, "Your kernel does not support IOPS Block read limit or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 450 logrus.Warn("Your kernel does not support IOPS Block I/O read limit in IO or the cgroup is not mounted. Block I/O IOPS read limit discarded.") 451 resources.BlkioDeviceReadIOps = []*pblkiodev.ThrottleDevice{} 452 } 453 if len(resources.BlkioDeviceWriteIOps) > 0 && !sysInfo.BlkioWriteIOpsDevice { 454 warnings = append(warnings, "Your kernel does not support IOPS Block write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 455 logrus.Warn("Your kernel does not support IOPS Block I/O write limit or the cgroup is not mounted. Block I/O IOPS write limit discarded.") 456 resources.BlkioDeviceWriteIOps = []*pblkiodev.ThrottleDevice{} 457 } 458 459 return warnings, nil 460 } 461 462 func (daemon *Daemon) getCgroupDriver() string { 463 cgroupDriver := cgroupFsDriver 464 465 if UsingSystemd(daemon.configStore) { 466 cgroupDriver = cgroupSystemdDriver 467 } 468 return cgroupDriver 469 } 470 471 // getCD gets the raw value of the native.cgroupdriver option, if set. 472 func getCD(config *Config) string { 473 for _, option := range config.ExecOptions { 474 key, val, err := parsers.ParseKeyValueOpt(option) 475 if err != nil || !strings.EqualFold(key, "native.cgroupdriver") { 476 continue 477 } 478 return val 479 } 480 return "" 481 } 482 483 // VerifyCgroupDriver validates native.cgroupdriver 484 func VerifyCgroupDriver(config *Config) error { 485 cd := getCD(config) 486 if cd == "" || cd == cgroupFsDriver || cd == cgroupSystemdDriver { 487 return nil 488 } 489 return fmt.Errorf("native.cgroupdriver option %s not supported", cd) 490 } 491 492 // UsingSystemd returns true if cli option includes native.cgroupdriver=systemd 493 func UsingSystemd(config *Config) bool { 494 return getCD(config) == cgroupSystemdDriver 495 } 496 497 // verifyPlatformContainerSettings performs platform-specific validation of the 498 // hostconfig and config structures. 499 func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) ([]string, error) { 500 warnings := []string{} 501 sysInfo := sysinfo.New(true) 502 503 warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config) 504 if err != nil { 505 return warnings, err 506 } 507 508 w, err := verifyContainerResources(&hostConfig.Resources, sysInfo, update) 509 510 // no matter err is nil or not, w could have data in itself. 511 warnings = append(warnings, w...) 512 513 if err != nil { 514 return warnings, err 515 } 516 517 if hostConfig.ShmSize < 0 { 518 return warnings, fmt.Errorf("SHM size can not be less than 0") 519 } 520 521 if hostConfig.OomScoreAdj < -1000 || hostConfig.OomScoreAdj > 1000 { 522 return warnings, fmt.Errorf("Invalid value %d, range for oom score adj is [-1000, 1000]", hostConfig.OomScoreAdj) 523 } 524 525 // ip-forwarding does not affect container with '--net=host' (or '--net=none') 526 if sysInfo.IPv4ForwardingDisabled && !(hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsNone()) { 527 warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") 528 logrus.Warn("IPv4 forwarding is disabled. Networking will not work") 529 } 530 // check for various conflicting options with user namespaces 531 if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() { 532 if hostConfig.Privileged { 533 return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces") 534 } 535 if hostConfig.NetworkMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 536 return warnings, fmt.Errorf("Cannot share the host's network namespace when user namespaces are enabled") 537 } 538 if hostConfig.PidMode.IsHost() && !hostConfig.UsernsMode.IsHost() { 539 return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled") 540 } 541 } 542 if hostConfig.CgroupParent != "" && UsingSystemd(daemon.configStore) { 543 // CgroupParent for systemd cgroup should be named as "xxx.slice" 544 if len(hostConfig.CgroupParent) <= 6 || !strings.HasSuffix(hostConfig.CgroupParent, ".slice") { 545 return warnings, fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 546 } 547 } 548 if hostConfig.Runtime == "" { 549 hostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName() 550 } 551 552 if rt := daemon.configStore.GetRuntime(hostConfig.Runtime); rt == nil { 553 return warnings, fmt.Errorf("Unknown runtime specified %s", hostConfig.Runtime) 554 } 555 556 return warnings, nil 557 } 558 559 // platformReload update configuration with platform specific options 560 func (daemon *Daemon) platformReload(config *Config) map[string]string { 561 if config.IsValueSet("runtimes") { 562 daemon.configStore.Runtimes = config.Runtimes 563 // Always set the default one 564 daemon.configStore.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 565 } 566 567 if config.DefaultRuntime != "" { 568 daemon.configStore.DefaultRuntime = config.DefaultRuntime 569 } 570 571 // Update attributes 572 var runtimeList bytes.Buffer 573 for name, rt := range daemon.configStore.Runtimes { 574 if runtimeList.Len() > 0 { 575 runtimeList.WriteRune(' ') 576 } 577 runtimeList.WriteString(fmt.Sprintf("%s:%s", name, rt)) 578 } 579 580 return map[string]string{ 581 "runtimes": runtimeList.String(), 582 "default-runtime": daemon.configStore.DefaultRuntime, 583 } 584 } 585 586 // verifyDaemonSettings performs validation of daemon config struct 587 func verifyDaemonSettings(config *Config) error { 588 // Check for mutually incompatible config options 589 if config.bridgeConfig.Iface != "" && config.bridgeConfig.IP != "" { 590 return fmt.Errorf("You specified -b & --bip, mutually exclusive options. Please specify only one") 591 } 592 if !config.bridgeConfig.EnableIPTables && !config.bridgeConfig.InterContainerCommunication { 593 return fmt.Errorf("You specified --iptables=false with --icc=false. ICC=false uses iptables to function. Please set --icc or --iptables to true") 594 } 595 if !config.bridgeConfig.EnableIPTables && config.bridgeConfig.EnableIPMasq { 596 config.bridgeConfig.EnableIPMasq = false 597 } 598 if err := VerifyCgroupDriver(config); err != nil { 599 return err 600 } 601 if config.CgroupParent != "" && UsingSystemd(config) { 602 if len(config.CgroupParent) <= 6 || !strings.HasSuffix(config.CgroupParent, ".slice") { 603 return fmt.Errorf("cgroup-parent for systemd cgroup should be a valid slice named as \"xxx.slice\"") 604 } 605 } 606 607 if config.DefaultRuntime == "" { 608 config.DefaultRuntime = stockRuntimeName 609 } 610 if config.Runtimes == nil { 611 config.Runtimes = make(map[string]types.Runtime) 612 } 613 config.Runtimes[stockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} 614 615 return nil 616 } 617 618 // checkSystem validates platform-specific requirements 619 func checkSystem() error { 620 if os.Geteuid() != 0 { 621 return fmt.Errorf("The Docker daemon needs to be run as root") 622 } 623 return checkKernel() 624 } 625 626 // configureMaxThreads sets the Go runtime max threads threshold 627 // which is 90% of the kernel setting from /proc/sys/kernel/threads-max 628 func configureMaxThreads(config *Config) error { 629 mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") 630 if err != nil { 631 return err 632 } 633 mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) 634 if err != nil { 635 return err 636 } 637 maxThreads := (mtint / 100) * 90 638 debug.SetMaxThreads(maxThreads) 639 logrus.Debugf("Golang's threads limit set to %d", maxThreads) 640 return nil 641 } 642 643 // configureKernelSecuritySupport configures and validates security support for the kernel 644 func configureKernelSecuritySupport(config *Config, driverName string) error { 645 if config.EnableSelinuxSupport { 646 if !selinuxEnabled() { 647 logrus.Warn("Docker could not enable SELinux on the host system") 648 } 649 } else { 650 selinuxSetDisabled() 651 } 652 return nil 653 } 654 655 func (daemon *Daemon) initNetworkController(config *Config, activeSandboxes map[string]interface{}) (libnetwork.NetworkController, error) { 656 netOptions, err := daemon.networkOptions(config, daemon.PluginStore, activeSandboxes) 657 if err != nil { 658 return nil, err 659 } 660 661 controller, err := libnetwork.New(netOptions...) 662 if err != nil { 663 return nil, fmt.Errorf("error obtaining controller instance: %v", err) 664 } 665 666 if len(activeSandboxes) > 0 { 667 logrus.Info("There are old running containers, the network config will not take affect") 668 return controller, nil 669 } 670 671 // Initialize default network on "null" 672 if n, _ := controller.NetworkByName("none"); n == nil { 673 if _, err := controller.NewNetwork("null", "none", "", libnetwork.NetworkOptionPersist(true)); err != nil { 674 return nil, fmt.Errorf("Error creating default \"null\" network: %v", err) 675 } 676 } 677 678 // Initialize default network on "host" 679 if n, _ := controller.NetworkByName("host"); n == nil { 680 if _, err := controller.NewNetwork("host", "host", "", libnetwork.NetworkOptionPersist(true)); err != nil { 681 return nil, fmt.Errorf("Error creating default \"host\" network: %v", err) 682 } 683 } 684 685 // Clear stale bridge network 686 if n, err := controller.NetworkByName("bridge"); err == nil { 687 if err = n.Delete(); err != nil { 688 return nil, fmt.Errorf("could not delete the default bridge network: %v", err) 689 } 690 } 691 692 if !config.DisableBridge { 693 // Initialize default driver "bridge" 694 if err := initBridgeDriver(controller, config); err != nil { 695 return nil, err 696 } 697 } else { 698 removeDefaultBridgeInterface() 699 } 700 701 return controller, nil 702 } 703 704 func driverOptions(config *Config) []nwconfig.Option { 705 bridgeConfig := options.Generic{ 706 "EnableIPForwarding": config.bridgeConfig.EnableIPForward, 707 "EnableIPTables": config.bridgeConfig.EnableIPTables, 708 "EnableUserlandProxy": config.bridgeConfig.EnableUserlandProxy, 709 "UserlandProxyPath": config.bridgeConfig.UserlandProxyPath} 710 bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig} 711 712 dOptions := []nwconfig.Option{} 713 dOptions = append(dOptions, nwconfig.OptionDriverConfig("bridge", bridgeOption)) 714 return dOptions 715 } 716 717 func initBridgeDriver(controller libnetwork.NetworkController, config *Config) error { 718 bridgeName := bridge.DefaultBridgeName 719 if config.bridgeConfig.Iface != "" { 720 bridgeName = config.bridgeConfig.Iface 721 } 722 netOption := map[string]string{ 723 bridge.BridgeName: bridgeName, 724 bridge.DefaultBridge: strconv.FormatBool(true), 725 netlabel.DriverMTU: strconv.Itoa(config.Mtu), 726 bridge.EnableIPMasquerade: strconv.FormatBool(config.bridgeConfig.EnableIPMasq), 727 bridge.EnableICC: strconv.FormatBool(config.bridgeConfig.InterContainerCommunication), 728 } 729 730 // --ip processing 731 if config.bridgeConfig.DefaultIP != nil { 732 netOption[bridge.DefaultBindingIP] = config.bridgeConfig.DefaultIP.String() 733 } 734 735 var ( 736 ipamV4Conf *libnetwork.IpamConf 737 ipamV6Conf *libnetwork.IpamConf 738 ) 739 740 ipamV4Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 741 742 nwList, nw6List, err := netutils.ElectInterfaceAddresses(bridgeName) 743 if err != nil { 744 return errors.Wrap(err, "list bridge addresses failed") 745 } 746 747 nw := nwList[0] 748 if len(nwList) > 1 && config.bridgeConfig.FixedCIDR != "" { 749 _, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR) 750 if err != nil { 751 return errors.Wrap(err, "parse CIDR failed") 752 } 753 // Iterate through in case there are multiple addresses for the bridge 754 for _, entry := range nwList { 755 if fCIDR.Contains(entry.IP) { 756 nw = entry 757 break 758 } 759 } 760 } 761 762 ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String() 763 hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask) 764 if hip.IsGlobalUnicast() { 765 ipamV4Conf.Gateway = nw.IP.String() 766 } 767 768 if config.bridgeConfig.IP != "" { 769 ipamV4Conf.PreferredPool = config.bridgeConfig.IP 770 ip, _, err := net.ParseCIDR(config.bridgeConfig.IP) 771 if err != nil { 772 return err 773 } 774 ipamV4Conf.Gateway = ip.String() 775 } else if bridgeName == bridge.DefaultBridgeName && ipamV4Conf.PreferredPool != "" { 776 logrus.Infof("Default bridge (%s) is assigned with an IP address %s. Daemon option --bip can be used to set a preferred IP address", bridgeName, ipamV4Conf.PreferredPool) 777 } 778 779 if config.bridgeConfig.FixedCIDR != "" { 780 _, fCIDR, err := net.ParseCIDR(config.bridgeConfig.FixedCIDR) 781 if err != nil { 782 return err 783 } 784 785 ipamV4Conf.SubPool = fCIDR.String() 786 } 787 788 if config.bridgeConfig.DefaultGatewayIPv4 != nil { 789 ipamV4Conf.AuxAddresses["DefaultGatewayIPv4"] = config.bridgeConfig.DefaultGatewayIPv4.String() 790 } 791 792 var deferIPv6Alloc bool 793 if config.bridgeConfig.FixedCIDRv6 != "" { 794 _, fCIDRv6, err := net.ParseCIDR(config.bridgeConfig.FixedCIDRv6) 795 if err != nil { 796 return err 797 } 798 799 // In case user has specified the daemon flag --fixed-cidr-v6 and the passed network has 800 // at least 48 host bits, we need to guarantee the current behavior where the containers' 801 // IPv6 addresses will be constructed based on the containers' interface MAC address. 802 // We do so by telling libnetwork to defer the IPv6 address allocation for the endpoints 803 // on this network until after the driver has created the endpoint and returned the 804 // constructed address. Libnetwork will then reserve this address with the ipam driver. 805 ones, _ := fCIDRv6.Mask.Size() 806 deferIPv6Alloc = ones <= 80 807 808 if ipamV6Conf == nil { 809 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 810 } 811 ipamV6Conf.PreferredPool = fCIDRv6.String() 812 813 // In case the --fixed-cidr-v6 is specified and the current docker0 bridge IPv6 814 // address belongs to the same network, we need to inform libnetwork about it, so 815 // that it can be reserved with IPAM and it will not be given away to somebody else 816 for _, nw6 := range nw6List { 817 if fCIDRv6.Contains(nw6.IP) { 818 ipamV6Conf.Gateway = nw6.IP.String() 819 break 820 } 821 } 822 } 823 824 if config.bridgeConfig.DefaultGatewayIPv6 != nil { 825 if ipamV6Conf == nil { 826 ipamV6Conf = &libnetwork.IpamConf{AuxAddresses: make(map[string]string)} 827 } 828 ipamV6Conf.AuxAddresses["DefaultGatewayIPv6"] = config.bridgeConfig.DefaultGatewayIPv6.String() 829 } 830 831 v4Conf := []*libnetwork.IpamConf{ipamV4Conf} 832 v6Conf := []*libnetwork.IpamConf{} 833 if ipamV6Conf != nil { 834 v6Conf = append(v6Conf, ipamV6Conf) 835 } 836 // Initialize default network on "bridge" with the same name 837 _, err = controller.NewNetwork("bridge", "bridge", "", 838 libnetwork.NetworkOptionEnableIPv6(config.bridgeConfig.EnableIPv6), 839 libnetwork.NetworkOptionDriverOpts(netOption), 840 libnetwork.NetworkOptionIpam("default", "", v4Conf, v6Conf, nil), 841 libnetwork.NetworkOptionDeferIPv6Alloc(deferIPv6Alloc)) 842 if err != nil { 843 return fmt.Errorf("Error creating default \"bridge\" network: %v", err) 844 } 845 return nil 846 } 847 848 // Remove default bridge interface if present (--bridge=none use case) 849 func removeDefaultBridgeInterface() { 850 if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil { 851 if err := netlink.LinkDel(lnk); err != nil { 852 logrus.Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err) 853 } 854 } 855 } 856 857 func (daemon *Daemon) getLayerInit() func(string) error { 858 return daemon.setupInitLayer 859 } 860 861 // setupInitLayer populates a directory with mountpoints suitable 862 // for bind-mounting things into the container. 863 // 864 // This extra layer is used by all containers as the top-most ro layer. It protects 865 // the container from unwanted side-effects on the rw layer. 866 func setupInitLayer(initLayer string, rootUID, rootGID int) error { 867 for pth, typ := range map[string]string{ 868 "/dev/pts": "dir", 869 "/dev/shm": "dir", 870 "/proc": "dir", 871 "/sys": "dir", 872 "/.dockerenv": "file", 873 "/etc/resolv.conf": "file", 874 "/etc/hosts": "file", 875 "/etc/hostname": "file", 876 "/dev/console": "file", 877 "/etc/mtab": "/proc/mounts", 878 } { 879 parts := strings.Split(pth, "/") 880 prev := "/" 881 for _, p := range parts[1:] { 882 prev = filepath.Join(prev, p) 883 syscall.Unlink(filepath.Join(initLayer, prev)) 884 } 885 886 if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil { 887 if os.IsNotExist(err) { 888 if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil { 889 return err 890 } 891 switch typ { 892 case "dir": 893 if err := idtools.MkdirAllNewAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil { 894 return err 895 } 896 case "file": 897 f, err := os.OpenFile(filepath.Join(initLayer, pth), os.O_CREATE, 0755) 898 if err != nil { 899 return err 900 } 901 f.Chown(rootUID, rootGID) 902 f.Close() 903 default: 904 if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil { 905 return err 906 } 907 } 908 } else { 909 return err 910 } 911 } 912 } 913 914 // Layer is ready to use, if it wasn't before. 915 return nil 916 } 917 918 // Parse the remapped root (user namespace) option, which can be one of: 919 // username - valid username from /etc/passwd 920 // username:groupname - valid username; valid groupname from /etc/group 921 // uid - 32-bit unsigned int valid Linux UID value 922 // uid:gid - uid value; 32-bit unsigned int Linux GID value 923 // 924 // If no groupname is specified, and a username is specified, an attempt 925 // will be made to lookup a gid for that username as a groupname 926 // 927 // If names are used, they are verified to exist in passwd/group 928 func parseRemappedRoot(usergrp string) (string, string, error) { 929 930 var ( 931 userID, groupID int 932 username, groupname string 933 ) 934 935 idparts := strings.Split(usergrp, ":") 936 if len(idparts) > 2 { 937 return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) 938 } 939 940 if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { 941 // must be a uid; take it as valid 942 userID = int(uid) 943 luser, err := idtools.LookupUID(userID) 944 if err != nil { 945 return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) 946 } 947 username = luser.Name 948 if len(idparts) == 1 { 949 // if the uid was numeric and no gid was specified, take the uid as the gid 950 groupID = userID 951 lgrp, err := idtools.LookupGID(groupID) 952 if err != nil { 953 return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) 954 } 955 groupname = lgrp.Name 956 } 957 } else { 958 lookupName := idparts[0] 959 // special case: if the user specified "default", they want Docker to create or 960 // use (after creation) the "dockremap" user/group for root remapping 961 if lookupName == defaultIDSpecifier { 962 lookupName = defaultRemappedID 963 } 964 luser, err := idtools.LookupUser(lookupName) 965 if err != nil && idparts[0] != defaultIDSpecifier { 966 // error if the name requested isn't the special "dockremap" ID 967 return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) 968 } else if err != nil { 969 // special case-- if the username == "default", then we have been asked 970 // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} 971 // ranges will be used for the user and group mappings in user namespaced containers 972 _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) 973 if err == nil { 974 return defaultRemappedID, defaultRemappedID, nil 975 } 976 return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) 977 } 978 username = luser.Name 979 if len(idparts) == 1 { 980 // we only have a string username, and no group specified; look up gid from username as group 981 group, err := idtools.LookupGroup(lookupName) 982 if err != nil { 983 return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) 984 } 985 groupID = group.Gid 986 groupname = group.Name 987 } 988 } 989 990 if len(idparts) == 2 { 991 // groupname or gid is separately specified and must be resolved 992 // to an unsigned 32-bit gid 993 if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { 994 // must be a gid, take it as valid 995 groupID = int(gid) 996 lgrp, err := idtools.LookupGID(groupID) 997 if err != nil { 998 return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) 999 } 1000 groupname = lgrp.Name 1001 } else { 1002 // not a number; attempt a lookup 1003 if _, err := idtools.LookupGroup(idparts[1]); err != nil { 1004 return "", "", fmt.Errorf("Error during groupname lookup for %q: %v", idparts[1], err) 1005 } 1006 groupname = idparts[1] 1007 } 1008 } 1009 return username, groupname, nil 1010 } 1011 1012 func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { 1013 if runtime.GOOS != "linux" && config.RemappedRoot != "" { 1014 return nil, nil, fmt.Errorf("User namespaces are only supported on Linux") 1015 } 1016 1017 // if the daemon was started with remapped root option, parse 1018 // the config option to the int uid,gid values 1019 var ( 1020 uidMaps, gidMaps []idtools.IDMap 1021 ) 1022 if config.RemappedRoot != "" { 1023 username, groupname, err := parseRemappedRoot(config.RemappedRoot) 1024 if err != nil { 1025 return nil, nil, err 1026 } 1027 if username == "root" { 1028 // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op 1029 // effectively 1030 logrus.Warn("User namespaces: root cannot be remapped with itself; user namespaces are OFF") 1031 return uidMaps, gidMaps, nil 1032 } 1033 logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) 1034 // update remapped root setting now that we have resolved them to actual names 1035 config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) 1036 1037 uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname) 1038 if err != nil { 1039 return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err) 1040 } 1041 } 1042 return uidMaps, gidMaps, nil 1043 } 1044 1045 func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { 1046 config.Root = rootDir 1047 // the docker root metadata directory needs to have execute permissions for all users (g+x,o+x) 1048 // so that syscalls executing as non-root, operating on subdirectories of the graph root 1049 // (e.g. mounted layers of a container) can traverse this path. 1050 // The user namespace support will create subdirectories for the remapped root host uid:gid 1051 // pair owned by that same uid:gid pair for proper write access to those needed metadata and 1052 // layer content subtrees. 1053 if _, err := os.Stat(rootDir); err == nil { 1054 // root current exists; verify the access bits are correct by setting them 1055 if err = os.Chmod(rootDir, 0711); err != nil { 1056 return err 1057 } 1058 } else if os.IsNotExist(err) { 1059 // no root exists yet, create it 0711 with root:root ownership 1060 if err := os.MkdirAll(rootDir, 0711); err != nil { 1061 return err 1062 } 1063 } 1064 1065 // if user namespaces are enabled we will create a subtree underneath the specified root 1066 // with any/all specified remapped root uid/gid options on the daemon creating 1067 // a new subdirectory with ownership set to the remapped uid/gid (so as to allow 1068 // `chdir()` to work for containers namespaced to that uid/gid) 1069 if config.RemappedRoot != "" { 1070 config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID)) 1071 logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) 1072 // Create the root directory if it doesn't exist 1073 if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil { 1074 return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) 1075 } 1076 // we also need to verify that any pre-existing directories in the path to 1077 // the graphroot won't block access to remapped root--if any pre-existing directory 1078 // has strict permissions that don't allow "x", container start will fail, so 1079 // better to warn and fail now 1080 dirPath := config.Root 1081 for { 1082 dirPath = filepath.Dir(dirPath) 1083 if dirPath == "/" { 1084 break 1085 } 1086 if !idtools.CanAccess(dirPath, rootUID, rootGID) { 1087 return fmt.Errorf("A subdirectory in your graphroot path (%s) restricts access to the remapped root uid/gid; please fix by allowing 'o+x' permissions on existing directories.", config.Root) 1088 } 1089 } 1090 } 1091 return nil 1092 } 1093 1094 // registerLinks writes the links to a file. 1095 func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error { 1096 if hostConfig == nil || hostConfig.NetworkMode.IsUserDefined() { 1097 return nil 1098 } 1099 1100 for _, l := range hostConfig.Links { 1101 name, alias, err := runconfigopts.ParseLink(l) 1102 if err != nil { 1103 return err 1104 } 1105 child, err := daemon.GetContainer(name) 1106 if err != nil { 1107 return fmt.Errorf("Could not get container for %s", name) 1108 } 1109 for child.HostConfig.NetworkMode.IsContainer() { 1110 parts := strings.SplitN(string(child.HostConfig.NetworkMode), ":", 2) 1111 child, err = daemon.GetContainer(parts[1]) 1112 if err != nil { 1113 return fmt.Errorf("Could not get container for %s", parts[1]) 1114 } 1115 } 1116 if child.HostConfig.NetworkMode.IsHost() { 1117 return runconfig.ErrConflictHostNetworkAndLinks 1118 } 1119 if err := daemon.registerLink(container, child, alias); err != nil { 1120 return err 1121 } 1122 } 1123 1124 // After we load all the links into the daemon 1125 // set them to nil on the hostconfig 1126 return container.WriteHostConfig() 1127 } 1128 1129 // conditionalMountOnStart is a platform specific helper function during the 1130 // container start to call mount. 1131 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { 1132 return daemon.Mount(container) 1133 } 1134 1135 // conditionalUnmountOnCleanup is a platform specific helper function called 1136 // during the cleanup of a container to unmount. 1137 func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { 1138 return daemon.Unmount(container) 1139 } 1140 1141 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { 1142 if !c.IsRunning() { 1143 return nil, errNotRunning{c.ID} 1144 } 1145 stats, err := daemon.containerd.Stats(c.ID) 1146 if err != nil { 1147 return nil, err 1148 } 1149 s := &types.StatsJSON{} 1150 cgs := stats.CgroupStats 1151 if cgs != nil { 1152 s.BlkioStats = types.BlkioStats{ 1153 IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive), 1154 IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive), 1155 IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive), 1156 IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive), 1157 IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive), 1158 IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive), 1159 IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive), 1160 SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive), 1161 } 1162 cpu := cgs.CpuStats 1163 s.CPUStats = types.CPUStats{ 1164 CPUUsage: types.CPUUsage{ 1165 TotalUsage: cpu.CpuUsage.TotalUsage, 1166 PercpuUsage: cpu.CpuUsage.PercpuUsage, 1167 UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode, 1168 UsageInUsermode: cpu.CpuUsage.UsageInUsermode, 1169 }, 1170 ThrottlingData: types.ThrottlingData{ 1171 Periods: cpu.ThrottlingData.Periods, 1172 ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods, 1173 ThrottledTime: cpu.ThrottlingData.ThrottledTime, 1174 }, 1175 } 1176 mem := cgs.MemoryStats.Usage 1177 s.MemoryStats = types.MemoryStats{ 1178 Usage: mem.Usage, 1179 MaxUsage: mem.MaxUsage, 1180 Stats: cgs.MemoryStats.Stats, 1181 Failcnt: mem.Failcnt, 1182 Limit: mem.Limit, 1183 } 1184 // if the container does not set memory limit, use the machineMemory 1185 if mem.Limit > daemon.statsCollector.machineMemory && daemon.statsCollector.machineMemory > 0 { 1186 s.MemoryStats.Limit = daemon.statsCollector.machineMemory 1187 } 1188 if cgs.PidsStats != nil { 1189 s.PidsStats = types.PidsStats{ 1190 Current: cgs.PidsStats.Current, 1191 } 1192 } 1193 } 1194 s.Read, err = ptypes.Timestamp(stats.Timestamp) 1195 if err != nil { 1196 return nil, err 1197 } 1198 return s, nil 1199 } 1200 1201 // setDefaultIsolation determines the default isolation mode for the 1202 // daemon to run in. This is only applicable on Windows 1203 func (daemon *Daemon) setDefaultIsolation() error { 1204 return nil 1205 } 1206 1207 func rootFSToAPIType(rootfs *image.RootFS) types.RootFS { 1208 var layers []string 1209 for _, l := range rootfs.DiffIDs { 1210 layers = append(layers, l.String()) 1211 } 1212 return types.RootFS{ 1213 Type: rootfs.Type, 1214 Layers: layers, 1215 } 1216 } 1217 1218 // setupDaemonProcess sets various settings for the daemon's process 1219 func setupDaemonProcess(config *Config) error { 1220 // setup the daemons oom_score_adj 1221 return setupOOMScoreAdj(config.OOMScoreAdjust) 1222 } 1223 1224 func setupOOMScoreAdj(score int) error { 1225 f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0) 1226 if err != nil { 1227 return err 1228 } 1229 defer f.Close() 1230 stringScore := strconv.Itoa(score) 1231 _, err = f.WriteString(stringScore) 1232 if os.IsPermission(err) { 1233 // Setting oom_score_adj does not work in an 1234 // unprivileged container. Ignore the error, but log 1235 // it if we appear not to be in that situation. 1236 if !rsystem.RunningInUserNS() { 1237 logrus.Debugf("Permission denied writing %q to /proc/self/oom_score_adj", stringScore) 1238 } 1239 return nil 1240 } 1241 1242 return err 1243 } 1244 1245 func (daemon *Daemon) initCgroupsPath(path string) error { 1246 if path == "/" || path == "." { 1247 return nil 1248 } 1249 1250 daemon.initCgroupsPath(filepath.Dir(path)) 1251 1252 _, root, err := cgroups.FindCgroupMountpointAndRoot("cpu") 1253 if err != nil { 1254 return err 1255 } 1256 1257 path = filepath.Join(root, path) 1258 sysinfo := sysinfo.New(true) 1259 if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { 1260 return err 1261 } 1262 if sysinfo.CPURealtimePeriod && daemon.configStore.CPURealtimePeriod != 0 { 1263 if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_period_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimePeriod, 10)), 0700); err != nil { 1264 return err 1265 } 1266 } 1267 if sysinfo.CPURealtimeRuntime && daemon.configStore.CPURealtimeRuntime != 0 { 1268 if err := ioutil.WriteFile(filepath.Join(path, "cpu.rt_runtime_us"), []byte(strconv.FormatInt(daemon.configStore.CPURealtimeRuntime, 10)), 0700); err != nil { 1269 return err 1270 } 1271 } 1272 return nil 1273 } 1274 1275 func (daemon *Daemon) setupSeccompProfile() error { 1276 if daemon.configStore.SeccompProfile != "" { 1277 daemon.seccompProfilePath = daemon.configStore.SeccompProfile 1278 b, err := ioutil.ReadFile(daemon.configStore.SeccompProfile) 1279 if err != nil { 1280 return fmt.Errorf("opening seccomp profile (%s) failed: %v", daemon.configStore.SeccompProfile, err) 1281 } 1282 daemon.seccompProfile = b 1283 } 1284 return nil 1285 }