github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/cluster/executor/container/container.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/docker/distribution/reference" 13 "github.com/docker/docker/api/types" 14 enginecontainer "github.com/docker/docker/api/types/container" 15 "github.com/docker/docker/api/types/events" 16 "github.com/docker/docker/api/types/filters" 17 enginemount "github.com/docker/docker/api/types/mount" 18 "github.com/docker/docker/api/types/network" 19 volumetypes "github.com/docker/docker/api/types/volume" 20 "github.com/docker/docker/daemon/cluster/convert" 21 executorpkg "github.com/docker/docker/daemon/cluster/executor" 22 clustertypes "github.com/docker/docker/daemon/cluster/provider" 23 "github.com/docker/go-connections/nat" 24 netconst "github.com/docker/libnetwork/datastore" 25 "github.com/docker/swarmkit/agent/exec" 26 "github.com/docker/swarmkit/api" 27 "github.com/docker/swarmkit/api/genericresource" 28 "github.com/docker/swarmkit/template" 29 gogotypes "github.com/gogo/protobuf/types" 30 ) 31 32 const ( 33 // systemLabelPrefix represents the reserved namespace for system labels. 34 systemLabelPrefix = "com.docker.swarm" 35 ) 36 37 // containerConfig converts task properties into docker container compatible 38 // components. 39 type containerConfig struct { 40 task *api.Task 41 networksAttachments map[string]*api.NetworkAttachment 42 } 43 44 // newContainerConfig returns a validated container config. No methods should 45 // return an error if this function returns without error. 46 func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) { 47 var c containerConfig 48 return &c, c.setTask(t, node) 49 } 50 51 func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error { 52 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 53 return exec.ErrRuntimeUnsupported 54 } 55 56 container := t.Spec.GetContainer() 57 if container != nil { 58 if container.Image == "" { 59 return ErrImageRequired 60 } 61 62 if err := validateMounts(container.Mounts); err != nil { 63 return err 64 } 65 } 66 67 // index the networks by name 68 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 69 for _, attachment := range t.Networks { 70 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 71 } 72 73 c.task = t 74 75 if t.Spec.GetContainer() != nil { 76 preparedSpec, err := template.ExpandContainerSpec(node, t) 77 if err != nil { 78 return err 79 } 80 c.task.Spec.Runtime = &api.TaskSpec_Container{ 81 Container: preparedSpec, 82 } 83 } 84 85 return nil 86 } 87 88 func (c *containerConfig) networkAttachmentContainerID() string { 89 attachment := c.task.Spec.GetAttachment() 90 if attachment == nil { 91 return "" 92 } 93 94 return attachment.ContainerID 95 } 96 97 func (c *containerConfig) taskID() string { 98 return c.task.ID 99 } 100 101 func (c *containerConfig) spec() *api.ContainerSpec { 102 return c.task.Spec.GetContainer() 103 } 104 105 func (c *containerConfig) nameOrID() string { 106 if c.task.Spec.GetContainer() != nil { 107 return c.name() 108 } 109 110 return c.networkAttachmentContainerID() 111 } 112 113 func (c *containerConfig) name() string { 114 if c.task.Annotations.Name != "" { 115 // if set, use the container Annotations.Name field, set in the orchestrator. 116 return c.task.Annotations.Name 117 } 118 119 slot := fmt.Sprint(c.task.Slot) 120 if slot == "" || c.task.Slot == 0 { 121 slot = c.task.NodeID 122 } 123 124 // fallback to service.slot.id. 125 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 126 } 127 128 func (c *containerConfig) image() string { 129 raw := c.spec().Image 130 ref, err := reference.ParseNormalizedNamed(raw) 131 if err != nil { 132 return raw 133 } 134 return reference.FamiliarString(reference.TagNameOnly(ref)) 135 } 136 137 func (c *containerConfig) portBindings() nat.PortMap { 138 portBindings := nat.PortMap{} 139 if c.task.Endpoint == nil { 140 return portBindings 141 } 142 143 for _, portConfig := range c.task.Endpoint.Ports { 144 if portConfig.PublishMode != api.PublishModeHost { 145 continue 146 } 147 148 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 149 binding := []nat.PortBinding{ 150 {}, 151 } 152 153 if portConfig.PublishedPort != 0 { 154 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 155 } 156 portBindings[port] = binding 157 } 158 159 return portBindings 160 } 161 162 func (c *containerConfig) isolation() enginecontainer.Isolation { 163 return convert.IsolationFromGRPC(c.spec().Isolation) 164 } 165 166 func (c *containerConfig) init() *bool { 167 if c.spec().Init == nil { 168 return nil 169 } 170 init := c.spec().Init.GetValue() 171 return &init 172 } 173 174 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 175 exposedPorts := make(map[nat.Port]struct{}) 176 if c.task.Endpoint == nil { 177 return exposedPorts 178 } 179 180 for _, portConfig := range c.task.Endpoint.Ports { 181 if portConfig.PublishMode != api.PublishModeHost { 182 continue 183 } 184 185 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 186 exposedPorts[port] = struct{}{} 187 } 188 189 return exposedPorts 190 } 191 192 func (c *containerConfig) config() *enginecontainer.Config { 193 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 194 env := append(c.spec().Env, genericEnvs...) 195 196 config := &enginecontainer.Config{ 197 Labels: c.labels(), 198 StopSignal: c.spec().StopSignal, 199 Tty: c.spec().TTY, 200 OpenStdin: c.spec().OpenStdin, 201 User: c.spec().User, 202 Env: env, 203 Hostname: c.spec().Hostname, 204 WorkingDir: c.spec().Dir, 205 Image: c.image(), 206 ExposedPorts: c.exposedPorts(), 207 Healthcheck: c.healthcheck(), 208 } 209 210 if len(c.spec().Command) > 0 { 211 // If Command is provided, we replace the whole invocation with Command 212 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 213 // case. 214 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 215 config.Cmd = append(config.Cmd, c.spec().Args...) 216 } else if len(c.spec().Args) > 0 { 217 // In this case, we assume the image has an Entrypoint and Args 218 // specifies the arguments for that entrypoint. 219 config.Cmd = c.spec().Args 220 } 221 222 return config 223 } 224 225 func (c *containerConfig) labels() map[string]string { 226 var ( 227 system = map[string]string{ 228 "task": "", // mark as cluster task 229 "task.id": c.task.ID, 230 "task.name": c.name(), 231 "node.id": c.task.NodeID, 232 "service.id": c.task.ServiceID, 233 "service.name": c.task.ServiceAnnotations.Name, 234 } 235 labels = make(map[string]string) 236 ) 237 238 // base labels are those defined in the spec. 239 for k, v := range c.spec().Labels { 240 labels[k] = v 241 } 242 243 // we then apply the overrides from the task, which may be set via the 244 // orchestrator. 245 for k, v := range c.task.Annotations.Labels { 246 labels[k] = v 247 } 248 249 // finally, we apply the system labels, which override all labels. 250 for k, v := range system { 251 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 252 } 253 254 return labels 255 } 256 257 func (c *containerConfig) mounts() []enginemount.Mount { 258 var r []enginemount.Mount 259 for _, mount := range c.spec().Mounts { 260 r = append(r, convertMount(mount)) 261 } 262 return r 263 } 264 265 func convertMount(m api.Mount) enginemount.Mount { 266 mount := enginemount.Mount{ 267 Source: m.Source, 268 Target: m.Target, 269 ReadOnly: m.ReadOnly, 270 } 271 272 switch m.Type { 273 case api.MountTypeBind: 274 mount.Type = enginemount.TypeBind 275 case api.MountTypeVolume: 276 mount.Type = enginemount.TypeVolume 277 case api.MountTypeTmpfs: 278 mount.Type = enginemount.TypeTmpfs 279 case api.MountTypeNamedPipe: 280 mount.Type = enginemount.TypeNamedPipe 281 } 282 283 if m.BindOptions != nil { 284 mount.BindOptions = &enginemount.BindOptions{ 285 NonRecursive: m.BindOptions.NonRecursive, 286 } 287 switch m.BindOptions.Propagation { 288 case api.MountPropagationRPrivate: 289 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 290 case api.MountPropagationPrivate: 291 mount.BindOptions.Propagation = enginemount.PropagationPrivate 292 case api.MountPropagationRSlave: 293 mount.BindOptions.Propagation = enginemount.PropagationRSlave 294 case api.MountPropagationSlave: 295 mount.BindOptions.Propagation = enginemount.PropagationSlave 296 case api.MountPropagationRShared: 297 mount.BindOptions.Propagation = enginemount.PropagationRShared 298 case api.MountPropagationShared: 299 mount.BindOptions.Propagation = enginemount.PropagationShared 300 } 301 } 302 303 if m.VolumeOptions != nil { 304 mount.VolumeOptions = &enginemount.VolumeOptions{ 305 NoCopy: m.VolumeOptions.NoCopy, 306 } 307 if m.VolumeOptions.Labels != nil { 308 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 309 for k, v := range m.VolumeOptions.Labels { 310 mount.VolumeOptions.Labels[k] = v 311 } 312 } 313 if m.VolumeOptions.DriverConfig != nil { 314 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 315 Name: m.VolumeOptions.DriverConfig.Name, 316 } 317 if m.VolumeOptions.DriverConfig.Options != nil { 318 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 319 for k, v := range m.VolumeOptions.DriverConfig.Options { 320 mount.VolumeOptions.DriverConfig.Options[k] = v 321 } 322 } 323 } 324 } 325 326 if m.TmpfsOptions != nil { 327 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 328 SizeBytes: m.TmpfsOptions.SizeBytes, 329 Mode: m.TmpfsOptions.Mode, 330 } 331 } 332 333 return mount 334 } 335 336 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 337 hcSpec := c.spec().Healthcheck 338 if hcSpec == nil { 339 return nil 340 } 341 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 342 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 343 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 344 return &enginecontainer.HealthConfig{ 345 Test: hcSpec.Test, 346 Interval: interval, 347 Timeout: timeout, 348 Retries: int(hcSpec.Retries), 349 StartPeriod: startPeriod, 350 } 351 } 352 353 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 354 hc := &enginecontainer.HostConfig{ 355 Resources: c.resources(), 356 GroupAdd: c.spec().Groups, 357 PortBindings: c.portBindings(), 358 Mounts: c.mounts(), 359 ReadonlyRootfs: c.spec().ReadOnly, 360 Isolation: c.isolation(), 361 Init: c.init(), 362 Sysctls: c.spec().Sysctls, 363 Capabilities: c.spec().Capabilities, 364 } 365 366 if c.spec().DNSConfig != nil { 367 hc.DNS = c.spec().DNSConfig.Nameservers 368 hc.DNSSearch = c.spec().DNSConfig.Search 369 hc.DNSOptions = c.spec().DNSConfig.Options 370 } 371 372 c.applyPrivileges(hc) 373 374 // The format of extra hosts on swarmkit is specified in: 375 // http://man7.org/linux/man-pages/man5/hosts.5.html 376 // IP_address canonical_hostname [aliases...] 377 // However, the format of ExtraHosts in HostConfig is 378 // <host>:<ip> 379 // We need to do the conversion here 380 // (Alias is ignored for now) 381 for _, entry := range c.spec().Hosts { 382 parts := strings.Fields(entry) 383 if len(parts) > 1 { 384 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 385 } 386 } 387 388 if c.task.LogDriver != nil { 389 hc.LogConfig = enginecontainer.LogConfig{ 390 Type: c.task.LogDriver.Name, 391 Config: c.task.LogDriver.Options, 392 } 393 } 394 395 if len(c.task.Networks) > 0 { 396 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 397 name := c.task.Networks[0].Network.Spec.Annotations.Name 398 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 399 hc.NetworkMode = enginecontainer.NetworkMode(name) 400 } 401 } 402 403 return hc 404 } 405 406 // This handles the case of volumes that are defined inside a service Mount 407 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody { 408 var ( 409 driverName string 410 driverOpts map[string]string 411 labels map[string]string 412 ) 413 414 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 415 driverName = mount.VolumeOptions.DriverConfig.Name 416 driverOpts = mount.VolumeOptions.DriverConfig.Options 417 labels = mount.VolumeOptions.Labels 418 } 419 420 if mount.VolumeOptions != nil { 421 return &volumetypes.VolumeCreateBody{ 422 Name: mount.Source, 423 Driver: driverName, 424 DriverOpts: driverOpts, 425 Labels: labels, 426 } 427 } 428 return nil 429 } 430 431 func (c *containerConfig) resources() enginecontainer.Resources { 432 resources := enginecontainer.Resources{} 433 434 // set pids limit 435 pidsLimit := c.spec().PidsLimit 436 if pidsLimit > 0 { 437 resources.PidsLimit = &pidsLimit 438 } 439 440 // If no limits are specified let the engine use its defaults. 441 // 442 // TODO(aluzzardi): We might want to set some limits anyway otherwise 443 // "unlimited" tasks will step over the reservation of other tasks. 444 r := c.task.Spec.Resources 445 if r == nil || r.Limits == nil { 446 return resources 447 } 448 449 if r.Limits.MemoryBytes > 0 { 450 resources.Memory = r.Limits.MemoryBytes 451 } 452 453 if r.Limits.NanoCPUs > 0 { 454 resources.NanoCPUs = r.Limits.NanoCPUs 455 } 456 457 return resources 458 } 459 460 // Docker daemon supports just 1 network during container create. 461 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 462 var networks []*api.NetworkAttachment 463 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 464 networks = c.task.Networks 465 } 466 467 epConfig := make(map[string]*network.EndpointSettings) 468 if len(networks) > 0 { 469 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 470 } 471 472 return &network.NetworkingConfig{EndpointsConfig: epConfig} 473 } 474 475 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 476 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 477 var networks []*api.NetworkAttachment 478 if c.task.Spec.GetContainer() != nil { 479 networks = c.task.Networks 480 } 481 // First network is used during container create. Other networks are used in "docker network connect" 482 if len(networks) < 2 { 483 return nil 484 } 485 486 epConfig := make(map[string]*network.EndpointSettings) 487 for _, na := range networks[1:] { 488 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 489 } 490 return &network.NetworkingConfig{EndpointsConfig: epConfig} 491 } 492 493 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 494 var ipv4, ipv6 string 495 for _, addr := range na.Addresses { 496 ip, _, err := net.ParseCIDR(addr) 497 if err != nil { 498 continue 499 } 500 501 if ip.To4() != nil { 502 ipv4 = ip.String() 503 continue 504 } 505 506 if ip.To16() != nil { 507 ipv6 = ip.String() 508 } 509 } 510 511 n := &network.EndpointSettings{ 512 NetworkID: na.Network.ID, 513 IPAMConfig: &network.EndpointIPAMConfig{ 514 IPv4Address: ipv4, 515 IPv6Address: ipv6, 516 }, 517 DriverOpts: na.DriverAttachmentOpts, 518 } 519 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 520 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 521 n.NetworkID = ln.ID() 522 } 523 } 524 return n 525 } 526 527 func (c *containerConfig) virtualIP(networkID string) string { 528 if c.task.Endpoint == nil { 529 return "" 530 } 531 532 for _, eVip := range c.task.Endpoint.VirtualIPs { 533 // We only support IPv4 VIPs for now. 534 if eVip.NetworkID == networkID { 535 vip, _, err := net.ParseCIDR(eVip.Addr) 536 if err != nil { 537 return "" 538 } 539 540 return vip.String() 541 } 542 } 543 544 return "" 545 } 546 547 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 548 if len(c.task.Networks) == 0 { 549 return nil 550 } 551 552 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 553 svcCfg := &clustertypes.ServiceConfig{ 554 Name: c.task.ServiceAnnotations.Name, 555 Aliases: make(map[string][]string), 556 ID: c.task.ServiceID, 557 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 558 } 559 560 for _, na := range c.task.Networks { 561 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 562 // We support only IPv4 virtual IP for now. 563 IPv4: c.virtualIP(na.Network.ID), 564 } 565 if len(na.Aliases) > 0 { 566 svcCfg.Aliases[na.Network.ID] = na.Aliases 567 } 568 } 569 570 if c.task.Endpoint != nil { 571 for _, ePort := range c.task.Endpoint.Ports { 572 if ePort.PublishMode != api.PublishModeIngress { 573 continue 574 } 575 576 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 577 Name: ePort.Name, 578 Protocol: int32(ePort.Protocol), 579 TargetPort: ePort.TargetPort, 580 PublishedPort: ePort.PublishedPort, 581 }) 582 } 583 } 584 585 return svcCfg 586 } 587 588 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 589 na, ok := c.networksAttachments[name] 590 if !ok { 591 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 592 } 593 594 options := types.NetworkCreate{ 595 // ID: na.Network.ID, 596 Labels: na.Network.Spec.Annotations.Labels, 597 Internal: na.Network.Spec.Internal, 598 Attachable: na.Network.Spec.Attachable, 599 Ingress: convert.IsIngressNetwork(na.Network), 600 EnableIPv6: na.Network.Spec.Ipv6Enabled, 601 CheckDuplicate: true, 602 Scope: netconst.SwarmScope, 603 } 604 605 if na.Network.Spec.GetNetwork() != "" { 606 options.ConfigFrom = &network.ConfigReference{ 607 Network: na.Network.Spec.GetNetwork(), 608 } 609 } 610 611 if na.Network.DriverState != nil { 612 options.Driver = na.Network.DriverState.Name 613 options.Options = na.Network.DriverState.Options 614 } 615 if na.Network.IPAM != nil { 616 options.IPAM = &network.IPAM{ 617 Driver: na.Network.IPAM.Driver.Name, 618 Options: na.Network.IPAM.Driver.Options, 619 } 620 for _, ic := range na.Network.IPAM.Configs { 621 c := network.IPAMConfig{ 622 Subnet: ic.Subnet, 623 IPRange: ic.Range, 624 Gateway: ic.Gateway, 625 } 626 options.IPAM.Config = append(options.IPAM.Config, c) 627 } 628 } 629 630 return clustertypes.NetworkCreateRequest{ 631 ID: na.Network.ID, 632 NetworkCreateRequest: types.NetworkCreateRequest{ 633 Name: name, 634 NetworkCreate: options, 635 }, 636 }, nil 637 } 638 639 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 640 privileges := c.spec().Privileges 641 if privileges == nil { 642 return 643 } 644 645 credentials := privileges.CredentialSpec 646 if credentials != nil { 647 switch credentials.Source.(type) { 648 case *api.Privileges_CredentialSpec_File: 649 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 650 case *api.Privileges_CredentialSpec_Registry: 651 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 652 case *api.Privileges_CredentialSpec_Config: 653 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=config://"+credentials.GetConfig()) 654 } 655 } 656 657 selinux := privileges.SELinuxContext 658 if selinux != nil { 659 if selinux.Disable { 660 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 661 } 662 if selinux.User != "" { 663 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 664 } 665 if selinux.Role != "" { 666 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 667 } 668 if selinux.Level != "" { 669 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 670 } 671 if selinux.Type != "" { 672 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 673 } 674 } 675 } 676 677 func (c containerConfig) eventFilter() filters.Args { 678 filter := filters.NewArgs() 679 filter.Add("type", events.ContainerEventType) 680 filter.Add("name", c.name()) 681 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 682 return filter 683 }