github.com/devdivbcp/moby@v17.12.0-ce-rc1.0.20200726071732-2d4bfdc789ad+incompatible/daemon/cluster/executor/container/container.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/docker/distribution/reference" 13 "github.com/docker/docker/api/types" 14 enginecontainer "github.com/docker/docker/api/types/container" 15 "github.com/docker/docker/api/types/events" 16 "github.com/docker/docker/api/types/filters" 17 enginemount "github.com/docker/docker/api/types/mount" 18 "github.com/docker/docker/api/types/network" 19 volumetypes "github.com/docker/docker/api/types/volume" 20 "github.com/docker/docker/daemon/cluster/convert" 21 executorpkg "github.com/docker/docker/daemon/cluster/executor" 22 clustertypes "github.com/docker/docker/daemon/cluster/provider" 23 "github.com/docker/go-connections/nat" 24 netconst "github.com/docker/libnetwork/datastore" 25 "github.com/docker/swarmkit/agent/exec" 26 "github.com/docker/swarmkit/api" 27 "github.com/docker/swarmkit/api/genericresource" 28 "github.com/docker/swarmkit/template" 29 gogotypes "github.com/gogo/protobuf/types" 30 ) 31 32 const ( 33 // systemLabelPrefix represents the reserved namespace for system labels. 34 systemLabelPrefix = "com.docker.swarm" 35 ) 36 37 // containerConfig converts task properties into docker container compatible 38 // components. 39 type containerConfig struct { 40 task *api.Task 41 networksAttachments map[string]*api.NetworkAttachment 42 } 43 44 // newContainerConfig returns a validated container config. No methods should 45 // return an error if this function returns without error. 46 func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) { 47 var c containerConfig 48 return &c, c.setTask(t, node) 49 } 50 51 func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error { 52 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 53 return exec.ErrRuntimeUnsupported 54 } 55 56 container := t.Spec.GetContainer() 57 if container != nil { 58 if container.Image == "" { 59 return ErrImageRequired 60 } 61 62 if err := validateMounts(container.Mounts); err != nil { 63 return err 64 } 65 } 66 67 // index the networks by name 68 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 69 for _, attachment := range t.Networks { 70 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 71 } 72 73 c.task = t 74 75 if t.Spec.GetContainer() != nil { 76 preparedSpec, err := template.ExpandContainerSpec(node, t) 77 if err != nil { 78 return err 79 } 80 c.task.Spec.Runtime = &api.TaskSpec_Container{ 81 Container: preparedSpec, 82 } 83 } 84 85 return nil 86 } 87 88 func (c *containerConfig) networkAttachmentContainerID() string { 89 attachment := c.task.Spec.GetAttachment() 90 if attachment == nil { 91 return "" 92 } 93 94 return attachment.ContainerID 95 } 96 97 func (c *containerConfig) taskID() string { 98 return c.task.ID 99 } 100 101 func (c *containerConfig) endpoint() *api.Endpoint { 102 return c.task.Endpoint 103 } 104 105 func (c *containerConfig) spec() *api.ContainerSpec { 106 return c.task.Spec.GetContainer() 107 } 108 109 func (c *containerConfig) nameOrID() string { 110 if c.task.Spec.GetContainer() != nil { 111 return c.name() 112 } 113 114 return c.networkAttachmentContainerID() 115 } 116 117 func (c *containerConfig) name() string { 118 if c.task.Annotations.Name != "" { 119 // if set, use the container Annotations.Name field, set in the orchestrator. 120 return c.task.Annotations.Name 121 } 122 123 slot := fmt.Sprint(c.task.Slot) 124 if slot == "" || c.task.Slot == 0 { 125 slot = c.task.NodeID 126 } 127 128 // fallback to service.slot.id. 129 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 130 } 131 132 func (c *containerConfig) image() string { 133 raw := c.spec().Image 134 ref, err := reference.ParseNormalizedNamed(raw) 135 if err != nil { 136 return raw 137 } 138 return reference.FamiliarString(reference.TagNameOnly(ref)) 139 } 140 141 func (c *containerConfig) portBindings() nat.PortMap { 142 portBindings := nat.PortMap{} 143 if c.task.Endpoint == nil { 144 return portBindings 145 } 146 147 for _, portConfig := range c.task.Endpoint.Ports { 148 if portConfig.PublishMode != api.PublishModeHost { 149 continue 150 } 151 152 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 153 binding := []nat.PortBinding{ 154 {}, 155 } 156 157 if portConfig.PublishedPort != 0 { 158 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 159 } 160 portBindings[port] = binding 161 } 162 163 return portBindings 164 } 165 166 func (c *containerConfig) isolation() enginecontainer.Isolation { 167 return convert.IsolationFromGRPC(c.spec().Isolation) 168 } 169 170 func (c *containerConfig) init() *bool { 171 if c.spec().Init == nil { 172 return nil 173 } 174 init := c.spec().Init.GetValue() 175 return &init 176 } 177 178 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 179 exposedPorts := make(map[nat.Port]struct{}) 180 if c.task.Endpoint == nil { 181 return exposedPorts 182 } 183 184 for _, portConfig := range c.task.Endpoint.Ports { 185 if portConfig.PublishMode != api.PublishModeHost { 186 continue 187 } 188 189 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 190 exposedPorts[port] = struct{}{} 191 } 192 193 return exposedPorts 194 } 195 196 func (c *containerConfig) config() *enginecontainer.Config { 197 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 198 env := append(c.spec().Env, genericEnvs...) 199 200 config := &enginecontainer.Config{ 201 Labels: c.labels(), 202 StopSignal: c.spec().StopSignal, 203 Tty: c.spec().TTY, 204 OpenStdin: c.spec().OpenStdin, 205 User: c.spec().User, 206 Env: env, 207 Hostname: c.spec().Hostname, 208 WorkingDir: c.spec().Dir, 209 Image: c.image(), 210 ExposedPorts: c.exposedPorts(), 211 Healthcheck: c.healthcheck(), 212 } 213 214 if len(c.spec().Command) > 0 { 215 // If Command is provided, we replace the whole invocation with Command 216 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 217 // case. 218 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 219 config.Cmd = append(config.Cmd, c.spec().Args...) 220 } else if len(c.spec().Args) > 0 { 221 // In this case, we assume the image has an Entrypoint and Args 222 // specifies the arguments for that entrypoint. 223 config.Cmd = c.spec().Args 224 } 225 226 return config 227 } 228 229 func (c *containerConfig) labels() map[string]string { 230 var ( 231 system = map[string]string{ 232 "task": "", // mark as cluster task 233 "task.id": c.task.ID, 234 "task.name": c.name(), 235 "node.id": c.task.NodeID, 236 "service.id": c.task.ServiceID, 237 "service.name": c.task.ServiceAnnotations.Name, 238 } 239 labels = make(map[string]string) 240 ) 241 242 // base labels are those defined in the spec. 243 for k, v := range c.spec().Labels { 244 labels[k] = v 245 } 246 247 // we then apply the overrides from the task, which may be set via the 248 // orchestrator. 249 for k, v := range c.task.Annotations.Labels { 250 labels[k] = v 251 } 252 253 // finally, we apply the system labels, which override all labels. 254 for k, v := range system { 255 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 256 } 257 258 return labels 259 } 260 261 func (c *containerConfig) mounts() []enginemount.Mount { 262 var r []enginemount.Mount 263 for _, mount := range c.spec().Mounts { 264 r = append(r, convertMount(mount)) 265 } 266 return r 267 } 268 269 func convertMount(m api.Mount) enginemount.Mount { 270 mount := enginemount.Mount{ 271 Source: m.Source, 272 Target: m.Target, 273 ReadOnly: m.ReadOnly, 274 } 275 276 switch m.Type { 277 case api.MountTypeBind: 278 mount.Type = enginemount.TypeBind 279 case api.MountTypeVolume: 280 mount.Type = enginemount.TypeVolume 281 case api.MountTypeTmpfs: 282 mount.Type = enginemount.TypeTmpfs 283 case api.MountTypeNamedPipe: 284 mount.Type = enginemount.TypeNamedPipe 285 } 286 287 if m.BindOptions != nil { 288 mount.BindOptions = &enginemount.BindOptions{} 289 switch m.BindOptions.Propagation { 290 case api.MountPropagationRPrivate: 291 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 292 case api.MountPropagationPrivate: 293 mount.BindOptions.Propagation = enginemount.PropagationPrivate 294 case api.MountPropagationRSlave: 295 mount.BindOptions.Propagation = enginemount.PropagationRSlave 296 case api.MountPropagationSlave: 297 mount.BindOptions.Propagation = enginemount.PropagationSlave 298 case api.MountPropagationRShared: 299 mount.BindOptions.Propagation = enginemount.PropagationRShared 300 case api.MountPropagationShared: 301 mount.BindOptions.Propagation = enginemount.PropagationShared 302 } 303 } 304 305 if m.VolumeOptions != nil { 306 mount.VolumeOptions = &enginemount.VolumeOptions{ 307 NoCopy: m.VolumeOptions.NoCopy, 308 } 309 if m.VolumeOptions.Labels != nil { 310 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 311 for k, v := range m.VolumeOptions.Labels { 312 mount.VolumeOptions.Labels[k] = v 313 } 314 } 315 if m.VolumeOptions.DriverConfig != nil { 316 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 317 Name: m.VolumeOptions.DriverConfig.Name, 318 } 319 if m.VolumeOptions.DriverConfig.Options != nil { 320 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 321 for k, v := range m.VolumeOptions.DriverConfig.Options { 322 mount.VolumeOptions.DriverConfig.Options[k] = v 323 } 324 } 325 } 326 } 327 328 if m.TmpfsOptions != nil { 329 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 330 SizeBytes: m.TmpfsOptions.SizeBytes, 331 Mode: m.TmpfsOptions.Mode, 332 } 333 } 334 335 return mount 336 } 337 338 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 339 hcSpec := c.spec().Healthcheck 340 if hcSpec == nil { 341 return nil 342 } 343 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 344 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 345 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 346 return &enginecontainer.HealthConfig{ 347 Test: hcSpec.Test, 348 Interval: interval, 349 Timeout: timeout, 350 Retries: int(hcSpec.Retries), 351 StartPeriod: startPeriod, 352 } 353 } 354 355 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 356 hc := &enginecontainer.HostConfig{ 357 Resources: c.resources(), 358 GroupAdd: c.spec().Groups, 359 PortBindings: c.portBindings(), 360 Mounts: c.mounts(), 361 ReadonlyRootfs: c.spec().ReadOnly, 362 Isolation: c.isolation(), 363 Init: c.init(), 364 Sysctls: c.spec().Sysctls, 365 } 366 367 if c.spec().DNSConfig != nil { 368 hc.DNS = c.spec().DNSConfig.Nameservers 369 hc.DNSSearch = c.spec().DNSConfig.Search 370 hc.DNSOptions = c.spec().DNSConfig.Options 371 } 372 373 c.applyPrivileges(hc) 374 375 // The format of extra hosts on swarmkit is specified in: 376 // http://man7.org/linux/man-pages/man5/hosts.5.html 377 // IP_address canonical_hostname [aliases...] 378 // However, the format of ExtraHosts in HostConfig is 379 // <host>:<ip> 380 // We need to do the conversion here 381 // (Alias is ignored for now) 382 for _, entry := range c.spec().Hosts { 383 parts := strings.Fields(entry) 384 if len(parts) > 1 { 385 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 386 } 387 } 388 389 if c.task.LogDriver != nil { 390 hc.LogConfig = enginecontainer.LogConfig{ 391 Type: c.task.LogDriver.Name, 392 Config: c.task.LogDriver.Options, 393 } 394 } 395 396 if len(c.task.Networks) > 0 { 397 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 398 name := c.task.Networks[0].Network.Spec.Annotations.Name 399 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 400 hc.NetworkMode = enginecontainer.NetworkMode(name) 401 } 402 } 403 404 return hc 405 } 406 407 // This handles the case of volumes that are defined inside a service Mount 408 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody { 409 var ( 410 driverName string 411 driverOpts map[string]string 412 labels map[string]string 413 ) 414 415 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 416 driverName = mount.VolumeOptions.DriverConfig.Name 417 driverOpts = mount.VolumeOptions.DriverConfig.Options 418 labels = mount.VolumeOptions.Labels 419 } 420 421 if mount.VolumeOptions != nil { 422 return &volumetypes.VolumeCreateBody{ 423 Name: mount.Source, 424 Driver: driverName, 425 DriverOpts: driverOpts, 426 Labels: labels, 427 } 428 } 429 return nil 430 } 431 432 func (c *containerConfig) resources() enginecontainer.Resources { 433 resources := enginecontainer.Resources{} 434 435 // If no limits are specified let the engine use its defaults. 436 // 437 // TODO(aluzzardi): We might want to set some limits anyway otherwise 438 // "unlimited" tasks will step over the reservation of other tasks. 439 r := c.task.Spec.Resources 440 if r == nil || r.Limits == nil { 441 return resources 442 } 443 444 if r.Limits.MemoryBytes > 0 { 445 resources.Memory = r.Limits.MemoryBytes 446 } 447 448 if r.Limits.NanoCPUs > 0 { 449 resources.NanoCPUs = r.Limits.NanoCPUs 450 } 451 452 return resources 453 } 454 455 // Docker daemon supports just 1 network during container create. 456 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 457 var networks []*api.NetworkAttachment 458 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 459 networks = c.task.Networks 460 } 461 462 epConfig := make(map[string]*network.EndpointSettings) 463 if len(networks) > 0 { 464 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 465 } 466 467 return &network.NetworkingConfig{EndpointsConfig: epConfig} 468 } 469 470 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 471 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 472 var networks []*api.NetworkAttachment 473 if c.task.Spec.GetContainer() != nil { 474 networks = c.task.Networks 475 } 476 // First network is used during container create. Other networks are used in "docker network connect" 477 if len(networks) < 2 { 478 return nil 479 } 480 481 epConfig := make(map[string]*network.EndpointSettings) 482 for _, na := range networks[1:] { 483 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 484 } 485 return &network.NetworkingConfig{EndpointsConfig: epConfig} 486 } 487 488 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 489 var ipv4, ipv6 string 490 for _, addr := range na.Addresses { 491 ip, _, err := net.ParseCIDR(addr) 492 if err != nil { 493 continue 494 } 495 496 if ip.To4() != nil { 497 ipv4 = ip.String() 498 continue 499 } 500 501 if ip.To16() != nil { 502 ipv6 = ip.String() 503 } 504 } 505 506 n := &network.EndpointSettings{ 507 NetworkID: na.Network.ID, 508 IPAMConfig: &network.EndpointIPAMConfig{ 509 IPv4Address: ipv4, 510 IPv6Address: ipv6, 511 }, 512 DriverOpts: na.DriverAttachmentOpts, 513 } 514 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 515 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 516 n.NetworkID = ln.ID() 517 } 518 } 519 return n 520 } 521 522 func (c *containerConfig) virtualIP(networkID string) string { 523 if c.task.Endpoint == nil { 524 return "" 525 } 526 527 for _, eVip := range c.task.Endpoint.VirtualIPs { 528 // We only support IPv4 VIPs for now. 529 if eVip.NetworkID == networkID { 530 vip, _, err := net.ParseCIDR(eVip.Addr) 531 if err != nil { 532 return "" 533 } 534 535 return vip.String() 536 } 537 } 538 539 return "" 540 } 541 542 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 543 if len(c.task.Networks) == 0 { 544 return nil 545 } 546 547 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 548 svcCfg := &clustertypes.ServiceConfig{ 549 Name: c.task.ServiceAnnotations.Name, 550 Aliases: make(map[string][]string), 551 ID: c.task.ServiceID, 552 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 553 } 554 555 for _, na := range c.task.Networks { 556 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 557 // We support only IPv4 virtual IP for now. 558 IPv4: c.virtualIP(na.Network.ID), 559 } 560 if len(na.Aliases) > 0 { 561 svcCfg.Aliases[na.Network.ID] = na.Aliases 562 } 563 } 564 565 if c.task.Endpoint != nil { 566 for _, ePort := range c.task.Endpoint.Ports { 567 if ePort.PublishMode != api.PublishModeIngress { 568 continue 569 } 570 571 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 572 Name: ePort.Name, 573 Protocol: int32(ePort.Protocol), 574 TargetPort: ePort.TargetPort, 575 PublishedPort: ePort.PublishedPort, 576 }) 577 } 578 } 579 580 return svcCfg 581 } 582 583 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 584 na, ok := c.networksAttachments[name] 585 if !ok { 586 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 587 } 588 589 options := types.NetworkCreate{ 590 // ID: na.Network.ID, 591 Labels: na.Network.Spec.Annotations.Labels, 592 Internal: na.Network.Spec.Internal, 593 Attachable: na.Network.Spec.Attachable, 594 Ingress: convert.IsIngressNetwork(na.Network), 595 EnableIPv6: na.Network.Spec.Ipv6Enabled, 596 CheckDuplicate: true, 597 Scope: netconst.SwarmScope, 598 } 599 600 if na.Network.Spec.GetNetwork() != "" { 601 options.ConfigFrom = &network.ConfigReference{ 602 Network: na.Network.Spec.GetNetwork(), 603 } 604 } 605 606 if na.Network.DriverState != nil { 607 options.Driver = na.Network.DriverState.Name 608 options.Options = na.Network.DriverState.Options 609 } 610 if na.Network.IPAM != nil { 611 options.IPAM = &network.IPAM{ 612 Driver: na.Network.IPAM.Driver.Name, 613 Options: na.Network.IPAM.Driver.Options, 614 } 615 for _, ic := range na.Network.IPAM.Configs { 616 c := network.IPAMConfig{ 617 Subnet: ic.Subnet, 618 IPRange: ic.Range, 619 Gateway: ic.Gateway, 620 } 621 options.IPAM.Config = append(options.IPAM.Config, c) 622 } 623 } 624 625 return clustertypes.NetworkCreateRequest{ 626 ID: na.Network.ID, 627 NetworkCreateRequest: types.NetworkCreateRequest{ 628 Name: name, 629 NetworkCreate: options, 630 }, 631 }, nil 632 } 633 634 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 635 privileges := c.spec().Privileges 636 if privileges == nil { 637 return 638 } 639 640 credentials := privileges.CredentialSpec 641 if credentials != nil { 642 switch credentials.Source.(type) { 643 case *api.Privileges_CredentialSpec_File: 644 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 645 case *api.Privileges_CredentialSpec_Registry: 646 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 647 case *api.Privileges_CredentialSpec_Config: 648 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=config://"+credentials.GetConfig()) 649 } 650 } 651 652 selinux := privileges.SELinuxContext 653 if selinux != nil { 654 if selinux.Disable { 655 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 656 } 657 if selinux.User != "" { 658 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 659 } 660 if selinux.Role != "" { 661 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 662 } 663 if selinux.Level != "" { 664 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 665 } 666 if selinux.Type != "" { 667 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 668 } 669 } 670 } 671 672 func (c containerConfig) eventFilter() filters.Args { 673 filter := filters.NewArgs() 674 filter.Add("type", events.ContainerEventType) 675 filter.Add("name", c.name()) 676 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 677 return filter 678 }