github.com/jwhonce/docker@v0.6.7-0.20190327063223-da823cf3a5a3/daemon/cluster/executor/container/container.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/sirupsen/logrus" 12 13 "github.com/docker/distribution/reference" 14 "github.com/docker/docker/api/types" 15 enginecontainer "github.com/docker/docker/api/types/container" 16 "github.com/docker/docker/api/types/events" 17 "github.com/docker/docker/api/types/filters" 18 enginemount "github.com/docker/docker/api/types/mount" 19 "github.com/docker/docker/api/types/network" 20 volumetypes "github.com/docker/docker/api/types/volume" 21 "github.com/docker/docker/daemon/cluster/convert" 22 executorpkg "github.com/docker/docker/daemon/cluster/executor" 23 clustertypes "github.com/docker/docker/daemon/cluster/provider" 24 "github.com/docker/go-connections/nat" 25 netconst "github.com/docker/libnetwork/datastore" 26 "github.com/docker/swarmkit/agent/exec" 27 "github.com/docker/swarmkit/api" 28 "github.com/docker/swarmkit/api/genericresource" 29 "github.com/docker/swarmkit/template" 30 gogotypes "github.com/gogo/protobuf/types" 31 ) 32 33 const ( 34 // Explicitly use the kernel's default setting for CPU quota of 100ms. 35 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 36 cpuQuotaPeriod = 100 * time.Millisecond 37 38 // systemLabelPrefix represents the reserved namespace for system labels. 39 systemLabelPrefix = "com.docker.swarm" 40 ) 41 42 // containerConfig converts task properties into docker container compatible 43 // components. 44 type containerConfig struct { 45 task *api.Task 46 networksAttachments map[string]*api.NetworkAttachment 47 } 48 49 // newContainerConfig returns a validated container config. No methods should 50 // return an error if this function returns without error. 51 func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) { 52 var c containerConfig 53 return &c, c.setTask(t, node) 54 } 55 56 func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error { 57 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 58 return exec.ErrRuntimeUnsupported 59 } 60 61 container := t.Spec.GetContainer() 62 if container != nil { 63 if container.Image == "" { 64 return ErrImageRequired 65 } 66 67 if err := validateMounts(container.Mounts); err != nil { 68 return err 69 } 70 } 71 72 // index the networks by name 73 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 74 for _, attachment := range t.Networks { 75 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 76 } 77 78 c.task = t 79 80 if t.Spec.GetContainer() != nil { 81 preparedSpec, err := template.ExpandContainerSpec(node, t) 82 if err != nil { 83 return err 84 } 85 c.task.Spec.Runtime = &api.TaskSpec_Container{ 86 Container: preparedSpec, 87 } 88 } 89 90 return nil 91 } 92 93 func (c *containerConfig) networkAttachmentContainerID() string { 94 attachment := c.task.Spec.GetAttachment() 95 if attachment == nil { 96 return "" 97 } 98 99 return attachment.ContainerID 100 } 101 102 func (c *containerConfig) taskID() string { 103 return c.task.ID 104 } 105 106 func (c *containerConfig) endpoint() *api.Endpoint { 107 return c.task.Endpoint 108 } 109 110 func (c *containerConfig) spec() *api.ContainerSpec { 111 return c.task.Spec.GetContainer() 112 } 113 114 func (c *containerConfig) nameOrID() string { 115 if c.task.Spec.GetContainer() != nil { 116 return c.name() 117 } 118 119 return c.networkAttachmentContainerID() 120 } 121 122 func (c *containerConfig) name() string { 123 if c.task.Annotations.Name != "" { 124 // if set, use the container Annotations.Name field, set in the orchestrator. 125 return c.task.Annotations.Name 126 } 127 128 slot := fmt.Sprint(c.task.Slot) 129 if slot == "" || c.task.Slot == 0 { 130 slot = c.task.NodeID 131 } 132 133 // fallback to service.slot.id. 134 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 135 } 136 137 func (c *containerConfig) image() string { 138 raw := c.spec().Image 139 ref, err := reference.ParseNormalizedNamed(raw) 140 if err != nil { 141 return raw 142 } 143 return reference.FamiliarString(reference.TagNameOnly(ref)) 144 } 145 146 func (c *containerConfig) portBindings() nat.PortMap { 147 portBindings := nat.PortMap{} 148 if c.task.Endpoint == nil { 149 return portBindings 150 } 151 152 for _, portConfig := range c.task.Endpoint.Ports { 153 if portConfig.PublishMode != api.PublishModeHost { 154 continue 155 } 156 157 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 158 binding := []nat.PortBinding{ 159 {}, 160 } 161 162 if portConfig.PublishedPort != 0 { 163 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 164 } 165 portBindings[port] = binding 166 } 167 168 return portBindings 169 } 170 171 func (c *containerConfig) isolation() enginecontainer.Isolation { 172 return convert.IsolationFromGRPC(c.spec().Isolation) 173 } 174 175 func (c *containerConfig) init() *bool { 176 if c.spec().Init == nil { 177 return nil 178 } 179 init := c.spec().Init.GetValue() 180 return &init 181 } 182 183 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 184 exposedPorts := make(map[nat.Port]struct{}) 185 if c.task.Endpoint == nil { 186 return exposedPorts 187 } 188 189 for _, portConfig := range c.task.Endpoint.Ports { 190 if portConfig.PublishMode != api.PublishModeHost { 191 continue 192 } 193 194 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 195 exposedPorts[port] = struct{}{} 196 } 197 198 return exposedPorts 199 } 200 201 func (c *containerConfig) config() *enginecontainer.Config { 202 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 203 env := append(c.spec().Env, genericEnvs...) 204 205 config := &enginecontainer.Config{ 206 Labels: c.labels(), 207 StopSignal: c.spec().StopSignal, 208 Tty: c.spec().TTY, 209 OpenStdin: c.spec().OpenStdin, 210 User: c.spec().User, 211 Env: env, 212 Hostname: c.spec().Hostname, 213 WorkingDir: c.spec().Dir, 214 Image: c.image(), 215 ExposedPorts: c.exposedPorts(), 216 Healthcheck: c.healthcheck(), 217 } 218 219 if len(c.spec().Command) > 0 { 220 // If Command is provided, we replace the whole invocation with Command 221 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 222 // case. 223 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 224 config.Cmd = append(config.Cmd, c.spec().Args...) 225 } else if len(c.spec().Args) > 0 { 226 // In this case, we assume the image has an Entrypoint and Args 227 // specifies the arguments for that entrypoint. 228 config.Cmd = c.spec().Args 229 } 230 231 return config 232 } 233 234 func (c *containerConfig) labels() map[string]string { 235 var ( 236 system = map[string]string{ 237 "task": "", // mark as cluster task 238 "task.id": c.task.ID, 239 "task.name": c.name(), 240 "node.id": c.task.NodeID, 241 "service.id": c.task.ServiceID, 242 "service.name": c.task.ServiceAnnotations.Name, 243 } 244 labels = make(map[string]string) 245 ) 246 247 // base labels are those defined in the spec. 248 for k, v := range c.spec().Labels { 249 labels[k] = v 250 } 251 252 // we then apply the overrides from the task, which may be set via the 253 // orchestrator. 254 for k, v := range c.task.Annotations.Labels { 255 labels[k] = v 256 } 257 258 // finally, we apply the system labels, which override all labels. 259 for k, v := range system { 260 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 261 } 262 263 return labels 264 } 265 266 func (c *containerConfig) mounts() []enginemount.Mount { 267 var r []enginemount.Mount 268 for _, mount := range c.spec().Mounts { 269 r = append(r, convertMount(mount)) 270 } 271 return r 272 } 273 274 func convertMount(m api.Mount) enginemount.Mount { 275 mount := enginemount.Mount{ 276 Source: m.Source, 277 Target: m.Target, 278 ReadOnly: m.ReadOnly, 279 } 280 281 switch m.Type { 282 case api.MountTypeBind: 283 mount.Type = enginemount.TypeBind 284 case api.MountTypeVolume: 285 mount.Type = enginemount.TypeVolume 286 case api.MountTypeTmpfs: 287 mount.Type = enginemount.TypeTmpfs 288 case api.MountTypeNamedPipe: 289 mount.Type = enginemount.TypeNamedPipe 290 } 291 292 if m.BindOptions != nil { 293 mount.BindOptions = &enginemount.BindOptions{} 294 switch m.BindOptions.Propagation { 295 case api.MountPropagationRPrivate: 296 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 297 case api.MountPropagationPrivate: 298 mount.BindOptions.Propagation = enginemount.PropagationPrivate 299 case api.MountPropagationRSlave: 300 mount.BindOptions.Propagation = enginemount.PropagationRSlave 301 case api.MountPropagationSlave: 302 mount.BindOptions.Propagation = enginemount.PropagationSlave 303 case api.MountPropagationRShared: 304 mount.BindOptions.Propagation = enginemount.PropagationRShared 305 case api.MountPropagationShared: 306 mount.BindOptions.Propagation = enginemount.PropagationShared 307 } 308 } 309 310 if m.VolumeOptions != nil { 311 mount.VolumeOptions = &enginemount.VolumeOptions{ 312 NoCopy: m.VolumeOptions.NoCopy, 313 } 314 if m.VolumeOptions.Labels != nil { 315 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 316 for k, v := range m.VolumeOptions.Labels { 317 mount.VolumeOptions.Labels[k] = v 318 } 319 } 320 if m.VolumeOptions.DriverConfig != nil { 321 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 322 Name: m.VolumeOptions.DriverConfig.Name, 323 } 324 if m.VolumeOptions.DriverConfig.Options != nil { 325 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 326 for k, v := range m.VolumeOptions.DriverConfig.Options { 327 mount.VolumeOptions.DriverConfig.Options[k] = v 328 } 329 } 330 } 331 } 332 333 if m.TmpfsOptions != nil { 334 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 335 SizeBytes: m.TmpfsOptions.SizeBytes, 336 Mode: m.TmpfsOptions.Mode, 337 } 338 } 339 340 return mount 341 } 342 343 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 344 hcSpec := c.spec().Healthcheck 345 if hcSpec == nil { 346 return nil 347 } 348 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 349 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 350 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 351 return &enginecontainer.HealthConfig{ 352 Test: hcSpec.Test, 353 Interval: interval, 354 Timeout: timeout, 355 Retries: int(hcSpec.Retries), 356 StartPeriod: startPeriod, 357 } 358 } 359 360 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 361 hc := &enginecontainer.HostConfig{ 362 Resources: c.resources(), 363 GroupAdd: c.spec().Groups, 364 PortBindings: c.portBindings(), 365 Mounts: c.mounts(), 366 ReadonlyRootfs: c.spec().ReadOnly, 367 Isolation: c.isolation(), 368 Init: c.init(), 369 Sysctls: c.spec().Sysctls, 370 } 371 372 if c.spec().DNSConfig != nil { 373 hc.DNS = c.spec().DNSConfig.Nameservers 374 hc.DNSSearch = c.spec().DNSConfig.Search 375 hc.DNSOptions = c.spec().DNSConfig.Options 376 } 377 378 c.applyPrivileges(hc) 379 380 // The format of extra hosts on swarmkit is specified in: 381 // http://man7.org/linux/man-pages/man5/hosts.5.html 382 // IP_address canonical_hostname [aliases...] 383 // However, the format of ExtraHosts in HostConfig is 384 // <host>:<ip> 385 // We need to do the conversion here 386 // (Alias is ignored for now) 387 for _, entry := range c.spec().Hosts { 388 parts := strings.Fields(entry) 389 if len(parts) > 1 { 390 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 391 } 392 } 393 394 if c.task.LogDriver != nil { 395 hc.LogConfig = enginecontainer.LogConfig{ 396 Type: c.task.LogDriver.Name, 397 Config: c.task.LogDriver.Options, 398 } 399 } 400 401 if len(c.task.Networks) > 0 { 402 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 403 name := c.task.Networks[0].Network.Spec.Annotations.Name 404 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 405 hc.NetworkMode = enginecontainer.NetworkMode(name) 406 } 407 } 408 409 return hc 410 } 411 412 // This handles the case of volumes that are defined inside a service Mount 413 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody { 414 var ( 415 driverName string 416 driverOpts map[string]string 417 labels map[string]string 418 ) 419 420 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 421 driverName = mount.VolumeOptions.DriverConfig.Name 422 driverOpts = mount.VolumeOptions.DriverConfig.Options 423 labels = mount.VolumeOptions.Labels 424 } 425 426 if mount.VolumeOptions != nil { 427 return &volumetypes.VolumeCreateBody{ 428 Name: mount.Source, 429 Driver: driverName, 430 DriverOpts: driverOpts, 431 Labels: labels, 432 } 433 } 434 return nil 435 } 436 437 func (c *containerConfig) resources() enginecontainer.Resources { 438 resources := enginecontainer.Resources{} 439 440 // If no limits are specified let the engine use its defaults. 441 // 442 // TODO(aluzzardi): We might want to set some limits anyway otherwise 443 // "unlimited" tasks will step over the reservation of other tasks. 444 r := c.task.Spec.Resources 445 if r == nil || r.Limits == nil { 446 return resources 447 } 448 449 if r.Limits.MemoryBytes > 0 { 450 resources.Memory = r.Limits.MemoryBytes 451 } 452 453 if r.Limits.NanoCPUs > 0 { 454 // CPU Period must be set in microseconds. 455 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 456 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 457 } 458 459 return resources 460 } 461 462 // Docker daemon supports just 1 network during container create. 463 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 464 var networks []*api.NetworkAttachment 465 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 466 networks = c.task.Networks 467 } 468 469 epConfig := make(map[string]*network.EndpointSettings) 470 if len(networks) > 0 { 471 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 472 } 473 474 return &network.NetworkingConfig{EndpointsConfig: epConfig} 475 } 476 477 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 478 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 479 var networks []*api.NetworkAttachment 480 if c.task.Spec.GetContainer() != nil { 481 networks = c.task.Networks 482 } 483 // First network is used during container create. Other networks are used in "docker network connect" 484 if len(networks) < 2 { 485 return nil 486 } 487 488 epConfig := make(map[string]*network.EndpointSettings) 489 for _, na := range networks[1:] { 490 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 491 } 492 return &network.NetworkingConfig{EndpointsConfig: epConfig} 493 } 494 495 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 496 var ipv4, ipv6 string 497 for _, addr := range na.Addresses { 498 ip, _, err := net.ParseCIDR(addr) 499 if err != nil { 500 continue 501 } 502 503 if ip.To4() != nil { 504 ipv4 = ip.String() 505 continue 506 } 507 508 if ip.To16() != nil { 509 ipv6 = ip.String() 510 } 511 } 512 513 n := &network.EndpointSettings{ 514 NetworkID: na.Network.ID, 515 IPAMConfig: &network.EndpointIPAMConfig{ 516 IPv4Address: ipv4, 517 IPv6Address: ipv6, 518 }, 519 DriverOpts: na.DriverAttachmentOpts, 520 } 521 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 522 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 523 n.NetworkID = ln.ID() 524 } 525 } 526 return n 527 } 528 529 func (c *containerConfig) virtualIP(networkID string) string { 530 if c.task.Endpoint == nil { 531 return "" 532 } 533 534 for _, eVip := range c.task.Endpoint.VirtualIPs { 535 // We only support IPv4 VIPs for now. 536 if eVip.NetworkID == networkID { 537 vip, _, err := net.ParseCIDR(eVip.Addr) 538 if err != nil { 539 return "" 540 } 541 542 return vip.String() 543 } 544 } 545 546 return "" 547 } 548 549 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 550 if len(c.task.Networks) == 0 { 551 return nil 552 } 553 554 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 555 svcCfg := &clustertypes.ServiceConfig{ 556 Name: c.task.ServiceAnnotations.Name, 557 Aliases: make(map[string][]string), 558 ID: c.task.ServiceID, 559 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 560 } 561 562 for _, na := range c.task.Networks { 563 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 564 // We support only IPv4 virtual IP for now. 565 IPv4: c.virtualIP(na.Network.ID), 566 } 567 if len(na.Aliases) > 0 { 568 svcCfg.Aliases[na.Network.ID] = na.Aliases 569 } 570 } 571 572 if c.task.Endpoint != nil { 573 for _, ePort := range c.task.Endpoint.Ports { 574 if ePort.PublishMode != api.PublishModeIngress { 575 continue 576 } 577 578 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 579 Name: ePort.Name, 580 Protocol: int32(ePort.Protocol), 581 TargetPort: ePort.TargetPort, 582 PublishedPort: ePort.PublishedPort, 583 }) 584 } 585 } 586 587 return svcCfg 588 } 589 590 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 591 na, ok := c.networksAttachments[name] 592 if !ok { 593 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 594 } 595 596 options := types.NetworkCreate{ 597 // ID: na.Network.ID, 598 Labels: na.Network.Spec.Annotations.Labels, 599 Internal: na.Network.Spec.Internal, 600 Attachable: na.Network.Spec.Attachable, 601 Ingress: convert.IsIngressNetwork(na.Network), 602 EnableIPv6: na.Network.Spec.Ipv6Enabled, 603 CheckDuplicate: true, 604 Scope: netconst.SwarmScope, 605 } 606 607 if na.Network.Spec.GetNetwork() != "" { 608 options.ConfigFrom = &network.ConfigReference{ 609 Network: na.Network.Spec.GetNetwork(), 610 } 611 } 612 613 if na.Network.DriverState != nil { 614 options.Driver = na.Network.DriverState.Name 615 options.Options = na.Network.DriverState.Options 616 } 617 if na.Network.IPAM != nil { 618 options.IPAM = &network.IPAM{ 619 Driver: na.Network.IPAM.Driver.Name, 620 Options: na.Network.IPAM.Driver.Options, 621 } 622 for _, ic := range na.Network.IPAM.Configs { 623 c := network.IPAMConfig{ 624 Subnet: ic.Subnet, 625 IPRange: ic.Range, 626 Gateway: ic.Gateway, 627 } 628 options.IPAM.Config = append(options.IPAM.Config, c) 629 } 630 } 631 632 return clustertypes.NetworkCreateRequest{ 633 ID: na.Network.ID, 634 NetworkCreateRequest: types.NetworkCreateRequest{ 635 Name: name, 636 NetworkCreate: options, 637 }, 638 }, nil 639 } 640 641 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 642 privileges := c.spec().Privileges 643 if privileges == nil { 644 return 645 } 646 647 credentials := privileges.CredentialSpec 648 if credentials != nil { 649 switch credentials.Source.(type) { 650 case *api.Privileges_CredentialSpec_File: 651 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 652 case *api.Privileges_CredentialSpec_Registry: 653 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 654 case *api.Privileges_CredentialSpec_Config: 655 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=config://"+credentials.GetConfig()) 656 } 657 } 658 659 selinux := privileges.SELinuxContext 660 if selinux != nil { 661 if selinux.Disable { 662 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 663 } 664 if selinux.User != "" { 665 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 666 } 667 if selinux.Role != "" { 668 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 669 } 670 if selinux.Level != "" { 671 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 672 } 673 if selinux.Type != "" { 674 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 675 } 676 } 677 } 678 679 func (c containerConfig) eventFilter() filters.Args { 680 filter := filters.NewArgs() 681 filter.Add("type", events.ContainerEventType) 682 filter.Add("name", c.name()) 683 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 684 return filter 685 }