github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/executor/container/container.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/docker/distribution/reference" 13 "github.com/docker/docker/api/types" 14 enginecontainer "github.com/docker/docker/api/types/container" 15 "github.com/docker/docker/api/types/events" 16 "github.com/docker/docker/api/types/filters" 17 enginemount "github.com/docker/docker/api/types/mount" 18 "github.com/docker/docker/api/types/network" 19 volumetypes "github.com/docker/docker/api/types/volume" 20 "github.com/docker/docker/daemon/cluster/convert" 21 executorpkg "github.com/docker/docker/daemon/cluster/executor" 22 clustertypes "github.com/docker/docker/daemon/cluster/provider" 23 "github.com/docker/go-connections/nat" 24 netconst "github.com/docker/libnetwork/datastore" 25 "github.com/docker/swarmkit/agent/exec" 26 "github.com/docker/swarmkit/api" 27 "github.com/docker/swarmkit/api/genericresource" 28 "github.com/docker/swarmkit/template" 29 gogotypes "github.com/gogo/protobuf/types" 30 ) 31 32 const ( 33 // systemLabelPrefix represents the reserved namespace for system labels. 34 systemLabelPrefix = "com.docker.swarm" 35 ) 36 37 // containerConfig converts task properties into docker container compatible 38 // components. 39 type containerConfig struct { 40 task *api.Task 41 networksAttachments map[string]*api.NetworkAttachment 42 } 43 44 // newContainerConfig returns a validated container config. No methods should 45 // return an error if this function returns without error. 46 func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) { 47 var c containerConfig 48 return &c, c.setTask(t, node) 49 } 50 51 func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error { 52 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 53 return exec.ErrRuntimeUnsupported 54 } 55 56 container := t.Spec.GetContainer() 57 if container != nil { 58 if container.Image == "" { 59 return ErrImageRequired 60 } 61 62 if err := validateMounts(container.Mounts); err != nil { 63 return err 64 } 65 } 66 67 // index the networks by name 68 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 69 for _, attachment := range t.Networks { 70 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 71 } 72 73 c.task = t 74 75 if t.Spec.GetContainer() != nil { 76 preparedSpec, err := template.ExpandContainerSpec(node, t) 77 if err != nil { 78 return err 79 } 80 c.task.Spec.Runtime = &api.TaskSpec_Container{ 81 Container: preparedSpec, 82 } 83 } 84 85 return nil 86 } 87 88 func (c *containerConfig) networkAttachmentContainerID() string { 89 attachment := c.task.Spec.GetAttachment() 90 if attachment == nil { 91 return "" 92 } 93 94 return attachment.ContainerID 95 } 96 97 func (c *containerConfig) taskID() string { 98 return c.task.ID 99 } 100 101 func (c *containerConfig) endpoint() *api.Endpoint { 102 return c.task.Endpoint 103 } 104 105 func (c *containerConfig) spec() *api.ContainerSpec { 106 return c.task.Spec.GetContainer() 107 } 108 109 func (c *containerConfig) nameOrID() string { 110 if c.task.Spec.GetContainer() != nil { 111 return c.name() 112 } 113 114 return c.networkAttachmentContainerID() 115 } 116 117 func (c *containerConfig) name() string { 118 if c.task.Annotations.Name != "" { 119 // if set, use the container Annotations.Name field, set in the orchestrator. 120 return c.task.Annotations.Name 121 } 122 123 slot := fmt.Sprint(c.task.Slot) 124 if slot == "" || c.task.Slot == 0 { 125 slot = c.task.NodeID 126 } 127 128 // fallback to service.slot.id. 129 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 130 } 131 132 func (c *containerConfig) image() string { 133 raw := c.spec().Image 134 ref, err := reference.ParseNormalizedNamed(raw) 135 if err != nil { 136 return raw 137 } 138 return reference.FamiliarString(reference.TagNameOnly(ref)) 139 } 140 141 func (c *containerConfig) portBindings() nat.PortMap { 142 portBindings := nat.PortMap{} 143 if c.task.Endpoint == nil { 144 return portBindings 145 } 146 147 for _, portConfig := range c.task.Endpoint.Ports { 148 if portConfig.PublishMode != api.PublishModeHost { 149 continue 150 } 151 152 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 153 binding := []nat.PortBinding{ 154 {}, 155 } 156 157 if portConfig.PublishedPort != 0 { 158 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 159 } 160 portBindings[port] = binding 161 } 162 163 return portBindings 164 } 165 166 func (c *containerConfig) isolation() enginecontainer.Isolation { 167 return convert.IsolationFromGRPC(c.spec().Isolation) 168 } 169 170 func (c *containerConfig) init() *bool { 171 if c.spec().Init == nil { 172 return nil 173 } 174 init := c.spec().Init.GetValue() 175 return &init 176 } 177 178 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 179 exposedPorts := make(map[nat.Port]struct{}) 180 if c.task.Endpoint == nil { 181 return exposedPorts 182 } 183 184 for _, portConfig := range c.task.Endpoint.Ports { 185 if portConfig.PublishMode != api.PublishModeHost { 186 continue 187 } 188 189 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 190 exposedPorts[port] = struct{}{} 191 } 192 193 return exposedPorts 194 } 195 196 func (c *containerConfig) config() *enginecontainer.Config { 197 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 198 env := append(c.spec().Env, genericEnvs...) 199 200 config := &enginecontainer.Config{ 201 Labels: c.labels(), 202 StopSignal: c.spec().StopSignal, 203 Tty: c.spec().TTY, 204 OpenStdin: c.spec().OpenStdin, 205 User: c.spec().User, 206 Env: env, 207 Hostname: c.spec().Hostname, 208 WorkingDir: c.spec().Dir, 209 Image: c.image(), 210 ExposedPorts: c.exposedPorts(), 211 Healthcheck: c.healthcheck(), 212 } 213 214 if len(c.spec().Command) > 0 { 215 // If Command is provided, we replace the whole invocation with Command 216 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 217 // case. 218 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 219 config.Cmd = append(config.Cmd, c.spec().Args...) 220 } else if len(c.spec().Args) > 0 { 221 // In this case, we assume the image has an Entrypoint and Args 222 // specifies the arguments for that entrypoint. 223 config.Cmd = c.spec().Args 224 } 225 226 return config 227 } 228 229 func (c *containerConfig) labels() map[string]string { 230 var ( 231 system = map[string]string{ 232 "task": "", // mark as cluster task 233 "task.id": c.task.ID, 234 "task.name": c.name(), 235 "node.id": c.task.NodeID, 236 "service.id": c.task.ServiceID, 237 "service.name": c.task.ServiceAnnotations.Name, 238 } 239 labels = make(map[string]string) 240 ) 241 242 // base labels are those defined in the spec. 243 for k, v := range c.spec().Labels { 244 labels[k] = v 245 } 246 247 // we then apply the overrides from the task, which may be set via the 248 // orchestrator. 249 for k, v := range c.task.Annotations.Labels { 250 labels[k] = v 251 } 252 253 // finally, we apply the system labels, which override all labels. 254 for k, v := range system { 255 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 256 } 257 258 return labels 259 } 260 261 func (c *containerConfig) mounts() []enginemount.Mount { 262 var r []enginemount.Mount 263 for _, mount := range c.spec().Mounts { 264 r = append(r, convertMount(mount)) 265 } 266 return r 267 } 268 269 func convertMount(m api.Mount) enginemount.Mount { 270 mount := enginemount.Mount{ 271 Source: m.Source, 272 Target: m.Target, 273 ReadOnly: m.ReadOnly, 274 } 275 276 switch m.Type { 277 case api.MountTypeBind: 278 mount.Type = enginemount.TypeBind 279 case api.MountTypeVolume: 280 mount.Type = enginemount.TypeVolume 281 case api.MountTypeTmpfs: 282 mount.Type = enginemount.TypeTmpfs 283 } 284 285 if m.BindOptions != nil { 286 mount.BindOptions = &enginemount.BindOptions{} 287 switch m.BindOptions.Propagation { 288 case api.MountPropagationRPrivate: 289 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 290 case api.MountPropagationPrivate: 291 mount.BindOptions.Propagation = enginemount.PropagationPrivate 292 case api.MountPropagationRSlave: 293 mount.BindOptions.Propagation = enginemount.PropagationRSlave 294 case api.MountPropagationSlave: 295 mount.BindOptions.Propagation = enginemount.PropagationSlave 296 case api.MountPropagationRShared: 297 mount.BindOptions.Propagation = enginemount.PropagationRShared 298 case api.MountPropagationShared: 299 mount.BindOptions.Propagation = enginemount.PropagationShared 300 } 301 } 302 303 if m.VolumeOptions != nil { 304 mount.VolumeOptions = &enginemount.VolumeOptions{ 305 NoCopy: m.VolumeOptions.NoCopy, 306 } 307 if m.VolumeOptions.Labels != nil { 308 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 309 for k, v := range m.VolumeOptions.Labels { 310 mount.VolumeOptions.Labels[k] = v 311 } 312 } 313 if m.VolumeOptions.DriverConfig != nil { 314 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 315 Name: m.VolumeOptions.DriverConfig.Name, 316 } 317 if m.VolumeOptions.DriverConfig.Options != nil { 318 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 319 for k, v := range m.VolumeOptions.DriverConfig.Options { 320 mount.VolumeOptions.DriverConfig.Options[k] = v 321 } 322 } 323 } 324 } 325 326 if m.TmpfsOptions != nil { 327 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 328 SizeBytes: m.TmpfsOptions.SizeBytes, 329 Mode: m.TmpfsOptions.Mode, 330 } 331 } 332 333 return mount 334 } 335 336 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 337 hcSpec := c.spec().Healthcheck 338 if hcSpec == nil { 339 return nil 340 } 341 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 342 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 343 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 344 return &enginecontainer.HealthConfig{ 345 Test: hcSpec.Test, 346 Interval: interval, 347 Timeout: timeout, 348 Retries: int(hcSpec.Retries), 349 StartPeriod: startPeriod, 350 } 351 } 352 353 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 354 hc := &enginecontainer.HostConfig{ 355 Resources: c.resources(), 356 GroupAdd: c.spec().Groups, 357 PortBindings: c.portBindings(), 358 Mounts: c.mounts(), 359 ReadonlyRootfs: c.spec().ReadOnly, 360 Isolation: c.isolation(), 361 Init: c.init(), 362 } 363 364 if c.spec().DNSConfig != nil { 365 hc.DNS = c.spec().DNSConfig.Nameservers 366 hc.DNSSearch = c.spec().DNSConfig.Search 367 hc.DNSOptions = c.spec().DNSConfig.Options 368 } 369 370 c.applyPrivileges(hc) 371 372 // The format of extra hosts on swarmkit is specified in: 373 // http://man7.org/linux/man-pages/man5/hosts.5.html 374 // IP_address canonical_hostname [aliases...] 375 // However, the format of ExtraHosts in HostConfig is 376 // <host>:<ip> 377 // We need to do the conversion here 378 // (Alias is ignored for now) 379 for _, entry := range c.spec().Hosts { 380 parts := strings.Fields(entry) 381 if len(parts) > 1 { 382 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 383 } 384 } 385 386 if c.task.LogDriver != nil { 387 hc.LogConfig = enginecontainer.LogConfig{ 388 Type: c.task.LogDriver.Name, 389 Config: c.task.LogDriver.Options, 390 } 391 } 392 393 if len(c.task.Networks) > 0 { 394 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 395 name := c.task.Networks[0].Network.Spec.Annotations.Name 396 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 397 hc.NetworkMode = enginecontainer.NetworkMode(name) 398 } 399 } 400 401 return hc 402 } 403 404 // This handles the case of volumes that are defined inside a service Mount 405 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody { 406 var ( 407 driverName string 408 driverOpts map[string]string 409 labels map[string]string 410 ) 411 412 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 413 driverName = mount.VolumeOptions.DriverConfig.Name 414 driverOpts = mount.VolumeOptions.DriverConfig.Options 415 labels = mount.VolumeOptions.Labels 416 } 417 418 if mount.VolumeOptions != nil { 419 return &volumetypes.VolumeCreateBody{ 420 Name: mount.Source, 421 Driver: driverName, 422 DriverOpts: driverOpts, 423 Labels: labels, 424 } 425 } 426 return nil 427 } 428 429 func (c *containerConfig) resources() enginecontainer.Resources { 430 resources := enginecontainer.Resources{} 431 432 // If no limits are specified let the engine use its defaults. 433 // 434 // TODO(aluzzardi): We might want to set some limits anyway otherwise 435 // "unlimited" tasks will step over the reservation of other tasks. 436 r := c.task.Spec.Resources 437 if r == nil || r.Limits == nil { 438 return resources 439 } 440 441 if r.Limits.MemoryBytes > 0 { 442 resources.Memory = r.Limits.MemoryBytes 443 } 444 445 if r.Limits.NanoCPUs > 0 { 446 resources.NanoCPUs = r.Limits.NanoCPUs 447 } 448 449 return resources 450 } 451 452 // Docker daemon supports just 1 network during container create. 453 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 454 var networks []*api.NetworkAttachment 455 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 456 networks = c.task.Networks 457 } 458 459 epConfig := make(map[string]*network.EndpointSettings) 460 if len(networks) > 0 { 461 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 462 } 463 464 return &network.NetworkingConfig{EndpointsConfig: epConfig} 465 } 466 467 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 468 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 469 var networks []*api.NetworkAttachment 470 if c.task.Spec.GetContainer() != nil { 471 networks = c.task.Networks 472 } 473 // First network is used during container create. Other networks are used in "docker network connect" 474 if len(networks) < 2 { 475 return nil 476 } 477 478 epConfig := make(map[string]*network.EndpointSettings) 479 for _, na := range networks[1:] { 480 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 481 } 482 return &network.NetworkingConfig{EndpointsConfig: epConfig} 483 } 484 485 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 486 var ipv4, ipv6 string 487 for _, addr := range na.Addresses { 488 ip, _, err := net.ParseCIDR(addr) 489 if err != nil { 490 continue 491 } 492 493 if ip.To4() != nil { 494 ipv4 = ip.String() 495 continue 496 } 497 498 if ip.To16() != nil { 499 ipv6 = ip.String() 500 } 501 } 502 503 n := &network.EndpointSettings{ 504 NetworkID: na.Network.ID, 505 IPAMConfig: &network.EndpointIPAMConfig{ 506 IPv4Address: ipv4, 507 IPv6Address: ipv6, 508 }, 509 DriverOpts: na.DriverAttachmentOpts, 510 } 511 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 512 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 513 n.NetworkID = ln.ID() 514 } 515 } 516 return n 517 } 518 519 func (c *containerConfig) virtualIP(networkID string) string { 520 if c.task.Endpoint == nil { 521 return "" 522 } 523 524 for _, eVip := range c.task.Endpoint.VirtualIPs { 525 // We only support IPv4 VIPs for now. 526 if eVip.NetworkID == networkID { 527 vip, _, err := net.ParseCIDR(eVip.Addr) 528 if err != nil { 529 return "" 530 } 531 532 return vip.String() 533 } 534 } 535 536 return "" 537 } 538 539 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 540 if len(c.task.Networks) == 0 { 541 return nil 542 } 543 544 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 545 svcCfg := &clustertypes.ServiceConfig{ 546 Name: c.task.ServiceAnnotations.Name, 547 Aliases: make(map[string][]string), 548 ID: c.task.ServiceID, 549 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 550 } 551 552 for _, na := range c.task.Networks { 553 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 554 // We support only IPv4 virtual IP for now. 555 IPv4: c.virtualIP(na.Network.ID), 556 } 557 if len(na.Aliases) > 0 { 558 svcCfg.Aliases[na.Network.ID] = na.Aliases 559 } 560 } 561 562 if c.task.Endpoint != nil { 563 for _, ePort := range c.task.Endpoint.Ports { 564 if ePort.PublishMode != api.PublishModeIngress { 565 continue 566 } 567 568 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 569 Name: ePort.Name, 570 Protocol: int32(ePort.Protocol), 571 TargetPort: ePort.TargetPort, 572 PublishedPort: ePort.PublishedPort, 573 }) 574 } 575 } 576 577 return svcCfg 578 } 579 580 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 581 na, ok := c.networksAttachments[name] 582 if !ok { 583 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 584 } 585 586 options := types.NetworkCreate{ 587 // ID: na.Network.ID, 588 Labels: na.Network.Spec.Annotations.Labels, 589 Internal: na.Network.Spec.Internal, 590 Attachable: na.Network.Spec.Attachable, 591 Ingress: convert.IsIngressNetwork(na.Network), 592 EnableIPv6: na.Network.Spec.Ipv6Enabled, 593 CheckDuplicate: true, 594 Scope: netconst.SwarmScope, 595 } 596 597 if na.Network.Spec.GetNetwork() != "" { 598 options.ConfigFrom = &network.ConfigReference{ 599 Network: na.Network.Spec.GetNetwork(), 600 } 601 } 602 603 if na.Network.DriverState != nil { 604 options.Driver = na.Network.DriverState.Name 605 options.Options = na.Network.DriverState.Options 606 } 607 if na.Network.IPAM != nil { 608 options.IPAM = &network.IPAM{ 609 Driver: na.Network.IPAM.Driver.Name, 610 Options: na.Network.IPAM.Driver.Options, 611 } 612 for _, ic := range na.Network.IPAM.Configs { 613 c := network.IPAMConfig{ 614 Subnet: ic.Subnet, 615 IPRange: ic.Range, 616 Gateway: ic.Gateway, 617 } 618 options.IPAM.Config = append(options.IPAM.Config, c) 619 } 620 } 621 622 return clustertypes.NetworkCreateRequest{ 623 ID: na.Network.ID, 624 NetworkCreateRequest: types.NetworkCreateRequest{ 625 Name: name, 626 NetworkCreate: options, 627 }, 628 }, nil 629 } 630 631 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 632 privileges := c.spec().Privileges 633 if privileges == nil { 634 return 635 } 636 637 credentials := privileges.CredentialSpec 638 if credentials != nil { 639 switch credentials.Source.(type) { 640 case *api.Privileges_CredentialSpec_File: 641 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 642 case *api.Privileges_CredentialSpec_Registry: 643 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 644 } 645 } 646 647 selinux := privileges.SELinuxContext 648 if selinux != nil { 649 if selinux.Disable { 650 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 651 } 652 if selinux.User != "" { 653 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 654 } 655 if selinux.Role != "" { 656 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 657 } 658 if selinux.Level != "" { 659 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 660 } 661 if selinux.Type != "" { 662 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 663 } 664 } 665 } 666 667 func (c containerConfig) eventFilter() filters.Args { 668 filter := filters.NewArgs() 669 filter.Add("type", events.ContainerEventType) 670 filter.Add("name", c.name()) 671 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 672 return filter 673 }