github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/daemon/cluster/executor/container/container.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/docker/distribution/reference" 13 "github.com/docker/docker/api/types" 14 enginecontainer "github.com/docker/docker/api/types/container" 15 "github.com/docker/docker/api/types/events" 16 "github.com/docker/docker/api/types/filters" 17 enginemount "github.com/docker/docker/api/types/mount" 18 "github.com/docker/docker/api/types/network" 19 volumetypes "github.com/docker/docker/api/types/volume" 20 "github.com/docker/docker/daemon/cluster/convert" 21 executorpkg "github.com/docker/docker/daemon/cluster/executor" 22 clustertypes "github.com/docker/docker/daemon/cluster/provider" 23 "github.com/docker/go-connections/nat" 24 "github.com/docker/go-units" 25 netconst "github.com/docker/libnetwork/datastore" 26 "github.com/docker/swarmkit/agent/exec" 27 "github.com/docker/swarmkit/api" 28 "github.com/docker/swarmkit/api/genericresource" 29 "github.com/docker/swarmkit/template" 30 gogotypes "github.com/gogo/protobuf/types" 31 ) 32 33 const ( 34 // systemLabelPrefix represents the reserved namespace for system labels. 35 systemLabelPrefix = "com.docker.swarm" 36 ) 37 38 // containerConfig converts task properties into docker container compatible 39 // components. 40 type containerConfig struct { 41 task *api.Task 42 networksAttachments map[string]*api.NetworkAttachment 43 } 44 45 // newContainerConfig returns a validated container config. No methods should 46 // return an error if this function returns without error. 47 func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) { 48 var c containerConfig 49 return &c, c.setTask(t, node) 50 } 51 52 func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error { 53 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 54 return exec.ErrRuntimeUnsupported 55 } 56 57 container := t.Spec.GetContainer() 58 if container != nil { 59 if container.Image == "" { 60 return ErrImageRequired 61 } 62 63 if err := validateMounts(container.Mounts); err != nil { 64 return err 65 } 66 } 67 68 // index the networks by name 69 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 70 for _, attachment := range t.Networks { 71 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 72 } 73 74 c.task = t 75 76 if t.Spec.GetContainer() != nil { 77 preparedSpec, err := template.ExpandContainerSpec(node, t) 78 if err != nil { 79 return err 80 } 81 c.task.Spec.Runtime = &api.TaskSpec_Container{ 82 Container: preparedSpec, 83 } 84 } 85 86 return nil 87 } 88 89 func (c *containerConfig) networkAttachmentContainerID() string { 90 attachment := c.task.Spec.GetAttachment() 91 if attachment == nil { 92 return "" 93 } 94 95 return attachment.ContainerID 96 } 97 98 func (c *containerConfig) taskID() string { 99 return c.task.ID 100 } 101 102 func (c *containerConfig) spec() *api.ContainerSpec { 103 return c.task.Spec.GetContainer() 104 } 105 106 func (c *containerConfig) nameOrID() string { 107 if c.task.Spec.GetContainer() != nil { 108 return c.name() 109 } 110 111 return c.networkAttachmentContainerID() 112 } 113 114 func (c *containerConfig) name() string { 115 if c.task.Annotations.Name != "" { 116 // if set, use the container Annotations.Name field, set in the orchestrator. 117 return c.task.Annotations.Name 118 } 119 120 slot := fmt.Sprint(c.task.Slot) 121 if slot == "" || c.task.Slot == 0 { 122 slot = c.task.NodeID 123 } 124 125 // fallback to service.slot.id. 126 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 127 } 128 129 func (c *containerConfig) image() string { 130 raw := c.spec().Image 131 ref, err := reference.ParseNormalizedNamed(raw) 132 if err != nil { 133 return raw 134 } 135 return reference.FamiliarString(reference.TagNameOnly(ref)) 136 } 137 138 func (c *containerConfig) portBindings() nat.PortMap { 139 portBindings := nat.PortMap{} 140 if c.task.Endpoint == nil { 141 return portBindings 142 } 143 144 for _, portConfig := range c.task.Endpoint.Ports { 145 if portConfig.PublishMode != api.PublishModeHost { 146 continue 147 } 148 149 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 150 binding := []nat.PortBinding{ 151 {}, 152 } 153 154 if portConfig.PublishedPort != 0 { 155 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 156 } 157 portBindings[port] = binding 158 } 159 160 return portBindings 161 } 162 163 func (c *containerConfig) isolation() enginecontainer.Isolation { 164 return convert.IsolationFromGRPC(c.spec().Isolation) 165 } 166 167 func (c *containerConfig) init() *bool { 168 if c.spec().Init == nil { 169 return nil 170 } 171 init := c.spec().Init.GetValue() 172 return &init 173 } 174 175 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 176 exposedPorts := make(map[nat.Port]struct{}) 177 if c.task.Endpoint == nil { 178 return exposedPorts 179 } 180 181 for _, portConfig := range c.task.Endpoint.Ports { 182 if portConfig.PublishMode != api.PublishModeHost { 183 continue 184 } 185 186 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 187 exposedPorts[port] = struct{}{} 188 } 189 190 return exposedPorts 191 } 192 193 func (c *containerConfig) config() *enginecontainer.Config { 194 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 195 env := append(c.spec().Env, genericEnvs...) 196 197 config := &enginecontainer.Config{ 198 Labels: c.labels(), 199 StopSignal: c.spec().StopSignal, 200 Tty: c.spec().TTY, 201 OpenStdin: c.spec().OpenStdin, 202 User: c.spec().User, 203 Env: env, 204 Hostname: c.spec().Hostname, 205 WorkingDir: c.spec().Dir, 206 Image: c.image(), 207 ExposedPorts: c.exposedPorts(), 208 Healthcheck: c.healthcheck(), 209 } 210 211 if len(c.spec().Command) > 0 { 212 // If Command is provided, we replace the whole invocation with Command 213 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 214 // case. 215 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 216 config.Cmd = append(config.Cmd, c.spec().Args...) 217 } else if len(c.spec().Args) > 0 { 218 // In this case, we assume the image has an Entrypoint and Args 219 // specifies the arguments for that entrypoint. 220 config.Cmd = c.spec().Args 221 } 222 223 return config 224 } 225 226 func (c *containerConfig) labels() map[string]string { 227 var ( 228 system = map[string]string{ 229 "task": "", // mark as cluster task 230 "task.id": c.task.ID, 231 "task.name": c.name(), 232 "node.id": c.task.NodeID, 233 "service.id": c.task.ServiceID, 234 "service.name": c.task.ServiceAnnotations.Name, 235 } 236 labels = make(map[string]string) 237 ) 238 239 // base labels are those defined in the spec. 240 for k, v := range c.spec().Labels { 241 labels[k] = v 242 } 243 244 // we then apply the overrides from the task, which may be set via the 245 // orchestrator. 246 for k, v := range c.task.Annotations.Labels { 247 labels[k] = v 248 } 249 250 // finally, we apply the system labels, which override all labels. 251 for k, v := range system { 252 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 253 } 254 255 return labels 256 } 257 258 func (c *containerConfig) mounts() []enginemount.Mount { 259 var r []enginemount.Mount 260 for _, mount := range c.spec().Mounts { 261 r = append(r, convertMount(mount)) 262 } 263 return r 264 } 265 266 func convertMount(m api.Mount) enginemount.Mount { 267 mount := enginemount.Mount{ 268 Source: m.Source, 269 Target: m.Target, 270 ReadOnly: m.ReadOnly, 271 } 272 273 switch m.Type { 274 case api.MountTypeBind: 275 mount.Type = enginemount.TypeBind 276 case api.MountTypeVolume: 277 mount.Type = enginemount.TypeVolume 278 case api.MountTypeTmpfs: 279 mount.Type = enginemount.TypeTmpfs 280 case api.MountTypeNamedPipe: 281 mount.Type = enginemount.TypeNamedPipe 282 } 283 284 if m.BindOptions != nil { 285 mount.BindOptions = &enginemount.BindOptions{ 286 NonRecursive: m.BindOptions.NonRecursive, 287 } 288 switch m.BindOptions.Propagation { 289 case api.MountPropagationRPrivate: 290 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 291 case api.MountPropagationPrivate: 292 mount.BindOptions.Propagation = enginemount.PropagationPrivate 293 case api.MountPropagationRSlave: 294 mount.BindOptions.Propagation = enginemount.PropagationRSlave 295 case api.MountPropagationSlave: 296 mount.BindOptions.Propagation = enginemount.PropagationSlave 297 case api.MountPropagationRShared: 298 mount.BindOptions.Propagation = enginemount.PropagationRShared 299 case api.MountPropagationShared: 300 mount.BindOptions.Propagation = enginemount.PropagationShared 301 } 302 } 303 304 if m.VolumeOptions != nil { 305 mount.VolumeOptions = &enginemount.VolumeOptions{ 306 NoCopy: m.VolumeOptions.NoCopy, 307 } 308 if m.VolumeOptions.Labels != nil { 309 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 310 for k, v := range m.VolumeOptions.Labels { 311 mount.VolumeOptions.Labels[k] = v 312 } 313 } 314 if m.VolumeOptions.DriverConfig != nil { 315 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 316 Name: m.VolumeOptions.DriverConfig.Name, 317 } 318 if m.VolumeOptions.DriverConfig.Options != nil { 319 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 320 for k, v := range m.VolumeOptions.DriverConfig.Options { 321 mount.VolumeOptions.DriverConfig.Options[k] = v 322 } 323 } 324 } 325 } 326 327 if m.TmpfsOptions != nil { 328 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 329 SizeBytes: m.TmpfsOptions.SizeBytes, 330 Mode: m.TmpfsOptions.Mode, 331 } 332 } 333 334 return mount 335 } 336 337 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 338 hcSpec := c.spec().Healthcheck 339 if hcSpec == nil { 340 return nil 341 } 342 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 343 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 344 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 345 return &enginecontainer.HealthConfig{ 346 Test: hcSpec.Test, 347 Interval: interval, 348 Timeout: timeout, 349 Retries: int(hcSpec.Retries), 350 StartPeriod: startPeriod, 351 } 352 } 353 354 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 355 hc := &enginecontainer.HostConfig{ 356 Resources: c.resources(), 357 GroupAdd: c.spec().Groups, 358 PortBindings: c.portBindings(), 359 Mounts: c.mounts(), 360 ReadonlyRootfs: c.spec().ReadOnly, 361 Isolation: c.isolation(), 362 Init: c.init(), 363 Sysctls: c.spec().Sysctls, 364 CapAdd: c.spec().CapabilityAdd, 365 CapDrop: c.spec().CapabilityDrop, 366 } 367 368 if c.spec().DNSConfig != nil { 369 hc.DNS = c.spec().DNSConfig.Nameservers 370 hc.DNSSearch = c.spec().DNSConfig.Search 371 hc.DNSOptions = c.spec().DNSConfig.Options 372 } 373 374 c.applyPrivileges(hc) 375 376 // The format of extra hosts on swarmkit is specified in: 377 // http://man7.org/linux/man-pages/man5/hosts.5.html 378 // IP_address canonical_hostname [aliases...] 379 // However, the format of ExtraHosts in HostConfig is 380 // <host>:<ip> 381 // We need to do the conversion here 382 // (Alias is ignored for now) 383 for _, entry := range c.spec().Hosts { 384 parts := strings.Fields(entry) 385 if len(parts) > 1 { 386 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 387 } 388 } 389 390 if c.task.LogDriver != nil { 391 hc.LogConfig = enginecontainer.LogConfig{ 392 Type: c.task.LogDriver.Name, 393 Config: c.task.LogDriver.Options, 394 } 395 } 396 397 if len(c.task.Networks) > 0 { 398 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 399 name := c.task.Networks[0].Network.Spec.Annotations.Name 400 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 401 hc.NetworkMode = enginecontainer.NetworkMode(name) 402 } 403 } 404 405 return hc 406 } 407 408 // This handles the case of volumes that are defined inside a service Mount 409 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody { 410 var ( 411 driverName string 412 driverOpts map[string]string 413 labels map[string]string 414 ) 415 416 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 417 driverName = mount.VolumeOptions.DriverConfig.Name 418 driverOpts = mount.VolumeOptions.DriverConfig.Options 419 labels = mount.VolumeOptions.Labels 420 } 421 422 if mount.VolumeOptions != nil { 423 return &volumetypes.VolumeCreateBody{ 424 Name: mount.Source, 425 Driver: driverName, 426 DriverOpts: driverOpts, 427 Labels: labels, 428 } 429 } 430 return nil 431 } 432 433 func (c *containerConfig) resources() enginecontainer.Resources { 434 resources := enginecontainer.Resources{} 435 436 // set pids limit 437 pidsLimit := c.spec().PidsLimit 438 if pidsLimit > 0 { 439 resources.PidsLimit = &pidsLimit 440 } 441 442 resources.Ulimits = make([]*units.Ulimit, len(c.spec().Ulimits)) 443 for i, ulimit := range c.spec().Ulimits { 444 resources.Ulimits[i] = &units.Ulimit{ 445 Name: ulimit.Name, 446 Soft: ulimit.Soft, 447 Hard: ulimit.Hard, 448 } 449 } 450 451 // If no limits are specified let the engine use its defaults. 452 // 453 // TODO(aluzzardi): We might want to set some limits anyway otherwise 454 // "unlimited" tasks will step over the reservation of other tasks. 455 r := c.task.Spec.Resources 456 if r == nil || r.Limits == nil { 457 return resources 458 } 459 460 if r.Limits.MemoryBytes > 0 { 461 resources.Memory = r.Limits.MemoryBytes 462 } 463 464 if r.Limits.NanoCPUs > 0 { 465 resources.NanoCPUs = r.Limits.NanoCPUs 466 } 467 468 return resources 469 } 470 471 // Docker daemon supports just 1 network during container create. 472 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 473 var networks []*api.NetworkAttachment 474 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 475 networks = c.task.Networks 476 } 477 478 epConfig := make(map[string]*network.EndpointSettings) 479 if len(networks) > 0 { 480 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 481 } 482 483 return &network.NetworkingConfig{EndpointsConfig: epConfig} 484 } 485 486 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 487 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 488 var networks []*api.NetworkAttachment 489 if c.task.Spec.GetContainer() != nil { 490 networks = c.task.Networks 491 } 492 // First network is used during container create. Other networks are used in "docker network connect" 493 if len(networks) < 2 { 494 return nil 495 } 496 497 epConfig := make(map[string]*network.EndpointSettings) 498 for _, na := range networks[1:] { 499 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 500 } 501 return &network.NetworkingConfig{EndpointsConfig: epConfig} 502 } 503 504 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 505 var ipv4, ipv6 string 506 for _, addr := range na.Addresses { 507 ip, _, err := net.ParseCIDR(addr) 508 if err != nil { 509 continue 510 } 511 512 if ip.To4() != nil { 513 ipv4 = ip.String() 514 continue 515 } 516 517 if ip.To16() != nil { 518 ipv6 = ip.String() 519 } 520 } 521 522 n := &network.EndpointSettings{ 523 NetworkID: na.Network.ID, 524 IPAMConfig: &network.EndpointIPAMConfig{ 525 IPv4Address: ipv4, 526 IPv6Address: ipv6, 527 }, 528 DriverOpts: na.DriverAttachmentOpts, 529 } 530 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 531 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 532 n.NetworkID = ln.ID() 533 } 534 } 535 return n 536 } 537 538 func (c *containerConfig) virtualIP(networkID string) string { 539 if c.task.Endpoint == nil { 540 return "" 541 } 542 543 for _, eVip := range c.task.Endpoint.VirtualIPs { 544 // We only support IPv4 VIPs for now. 545 if eVip.NetworkID == networkID { 546 vip, _, err := net.ParseCIDR(eVip.Addr) 547 if err != nil { 548 return "" 549 } 550 551 return vip.String() 552 } 553 } 554 555 return "" 556 } 557 558 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 559 if len(c.task.Networks) == 0 { 560 return nil 561 } 562 563 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 564 svcCfg := &clustertypes.ServiceConfig{ 565 Name: c.task.ServiceAnnotations.Name, 566 Aliases: make(map[string][]string), 567 ID: c.task.ServiceID, 568 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 569 } 570 571 for _, na := range c.task.Networks { 572 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 573 // We support only IPv4 virtual IP for now. 574 IPv4: c.virtualIP(na.Network.ID), 575 } 576 if len(na.Aliases) > 0 { 577 svcCfg.Aliases[na.Network.ID] = na.Aliases 578 } 579 } 580 581 if c.task.Endpoint != nil { 582 for _, ePort := range c.task.Endpoint.Ports { 583 if ePort.PublishMode != api.PublishModeIngress { 584 continue 585 } 586 587 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 588 Name: ePort.Name, 589 Protocol: int32(ePort.Protocol), 590 TargetPort: ePort.TargetPort, 591 PublishedPort: ePort.PublishedPort, 592 }) 593 } 594 } 595 596 return svcCfg 597 } 598 599 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 600 na, ok := c.networksAttachments[name] 601 if !ok { 602 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 603 } 604 605 options := types.NetworkCreate{ 606 // ID: na.Network.ID, 607 Labels: na.Network.Spec.Annotations.Labels, 608 Internal: na.Network.Spec.Internal, 609 Attachable: na.Network.Spec.Attachable, 610 Ingress: convert.IsIngressNetwork(na.Network), 611 EnableIPv6: na.Network.Spec.Ipv6Enabled, 612 CheckDuplicate: true, 613 Scope: netconst.SwarmScope, 614 } 615 616 if na.Network.Spec.GetNetwork() != "" { 617 options.ConfigFrom = &network.ConfigReference{ 618 Network: na.Network.Spec.GetNetwork(), 619 } 620 } 621 622 if na.Network.DriverState != nil { 623 options.Driver = na.Network.DriverState.Name 624 options.Options = na.Network.DriverState.Options 625 } 626 if na.Network.IPAM != nil { 627 options.IPAM = &network.IPAM{ 628 Driver: na.Network.IPAM.Driver.Name, 629 Options: na.Network.IPAM.Driver.Options, 630 } 631 for _, ic := range na.Network.IPAM.Configs { 632 c := network.IPAMConfig{ 633 Subnet: ic.Subnet, 634 IPRange: ic.Range, 635 Gateway: ic.Gateway, 636 } 637 options.IPAM.Config = append(options.IPAM.Config, c) 638 } 639 } 640 641 return clustertypes.NetworkCreateRequest{ 642 ID: na.Network.ID, 643 NetworkCreateRequest: types.NetworkCreateRequest{ 644 Name: name, 645 NetworkCreate: options, 646 }, 647 }, nil 648 } 649 650 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 651 privileges := c.spec().Privileges 652 if privileges == nil { 653 return 654 } 655 656 credentials := privileges.CredentialSpec 657 if credentials != nil { 658 switch credentials.Source.(type) { 659 case *api.Privileges_CredentialSpec_File: 660 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 661 case *api.Privileges_CredentialSpec_Registry: 662 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 663 case *api.Privileges_CredentialSpec_Config: 664 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=config://"+credentials.GetConfig()) 665 } 666 } 667 668 selinux := privileges.SELinuxContext 669 if selinux != nil { 670 if selinux.Disable { 671 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 672 } 673 if selinux.User != "" { 674 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 675 } 676 if selinux.Role != "" { 677 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 678 } 679 if selinux.Level != "" { 680 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 681 } 682 if selinux.Type != "" { 683 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 684 } 685 } 686 } 687 688 func (c containerConfig) eventFilter() filters.Args { 689 filter := filters.NewArgs() 690 filter.Add("type", events.ContainerEventType) 691 filter.Add("name", c.name()) 692 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 693 return filter 694 }