github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/Sirupsen/logrus" 12 13 "github.com/docker/distribution/reference" 14 "github.com/docker/docker/api/types" 15 enginecontainer "github.com/docker/docker/api/types/container" 16 "github.com/docker/docker/api/types/events" 17 "github.com/docker/docker/api/types/filters" 18 enginemount "github.com/docker/docker/api/types/mount" 19 "github.com/docker/docker/api/types/network" 20 volumetypes "github.com/docker/docker/api/types/volume" 21 "github.com/docker/docker/daemon/cluster/convert" 22 executorpkg "github.com/docker/docker/daemon/cluster/executor" 23 clustertypes "github.com/docker/docker/daemon/cluster/provider" 24 "github.com/docker/go-connections/nat" 25 netconst "github.com/docker/libnetwork/datastore" 26 "github.com/docker/swarmkit/agent/exec" 27 "github.com/docker/swarmkit/api" 28 "github.com/docker/swarmkit/template" 29 gogotypes "github.com/gogo/protobuf/types" 30 ) 31 32 const ( 33 // Explicitly use the kernel's default setting for CPU quota of 100ms. 34 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 35 cpuQuotaPeriod = 100 * time.Millisecond 36 37 // systemLabelPrefix represents the reserved namespace for system labels. 38 systemLabelPrefix = "com.docker.swarm" 39 ) 40 41 // containerConfig converts task properties into docker container compatible 42 // components. 43 type containerConfig struct { 44 task *api.Task 45 networksAttachments map[string]*api.NetworkAttachment 46 } 47 48 // newContainerConfig returns a validated container config. No methods should 49 // return an error if this function returns without error. 50 func newContainerConfig(t *api.Task) (*containerConfig, error) { 51 var c containerConfig 52 return &c, c.setTask(t) 53 } 54 55 func (c *containerConfig) setTask(t *api.Task) error { 56 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 57 return exec.ErrRuntimeUnsupported 58 } 59 60 container := t.Spec.GetContainer() 61 if container != nil { 62 if container.Image == "" { 63 return ErrImageRequired 64 } 65 66 if err := validateMounts(container.Mounts); err != nil { 67 return err 68 } 69 } 70 71 // index the networks by name 72 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 73 for _, attachment := range t.Networks { 74 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 75 } 76 77 c.task = t 78 79 if t.Spec.GetContainer() != nil { 80 preparedSpec, err := template.ExpandContainerSpec(t) 81 if err != nil { 82 return err 83 } 84 c.task.Spec.Runtime = &api.TaskSpec_Container{ 85 Container: preparedSpec, 86 } 87 } 88 89 return nil 90 } 91 92 func (c *containerConfig) id() string { 93 attachment := c.task.Spec.GetAttachment() 94 if attachment == nil { 95 return "" 96 } 97 98 return attachment.ContainerID 99 } 100 101 func (c *containerConfig) taskID() string { 102 return c.task.ID 103 } 104 105 func (c *containerConfig) endpoint() *api.Endpoint { 106 return c.task.Endpoint 107 } 108 109 func (c *containerConfig) spec() *api.ContainerSpec { 110 return c.task.Spec.GetContainer() 111 } 112 113 func (c *containerConfig) nameOrID() string { 114 if c.task.Spec.GetContainer() != nil { 115 return c.name() 116 } 117 118 return c.id() 119 } 120 121 func (c *containerConfig) name() string { 122 if c.task.Annotations.Name != "" { 123 // if set, use the container Annotations.Name field, set in the orchestrator. 124 return c.task.Annotations.Name 125 } 126 127 slot := fmt.Sprint(c.task.Slot) 128 if slot == "" || c.task.Slot == 0 { 129 slot = c.task.NodeID 130 } 131 132 // fallback to service.slot.id. 133 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 134 } 135 136 func (c *containerConfig) image() string { 137 raw := c.spec().Image 138 ref, err := reference.ParseNormalizedNamed(raw) 139 if err != nil { 140 return raw 141 } 142 return reference.FamiliarString(reference.TagNameOnly(ref)) 143 } 144 145 func (c *containerConfig) portBindings() nat.PortMap { 146 portBindings := nat.PortMap{} 147 if c.task.Endpoint == nil { 148 return portBindings 149 } 150 151 for _, portConfig := range c.task.Endpoint.Ports { 152 if portConfig.PublishMode != api.PublishModeHost { 153 continue 154 } 155 156 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 157 binding := []nat.PortBinding{ 158 {}, 159 } 160 161 if portConfig.PublishedPort != 0 { 162 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 163 } 164 portBindings[port] = binding 165 } 166 167 return portBindings 168 } 169 170 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 171 exposedPorts := make(map[nat.Port]struct{}) 172 if c.task.Endpoint == nil { 173 return exposedPorts 174 } 175 176 for _, portConfig := range c.task.Endpoint.Ports { 177 if portConfig.PublishMode != api.PublishModeHost { 178 continue 179 } 180 181 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 182 exposedPorts[port] = struct{}{} 183 } 184 185 return exposedPorts 186 } 187 188 func (c *containerConfig) config() *enginecontainer.Config { 189 config := &enginecontainer.Config{ 190 Labels: c.labels(), 191 StopSignal: c.spec().StopSignal, 192 Tty: c.spec().TTY, 193 OpenStdin: c.spec().OpenStdin, 194 User: c.spec().User, 195 Env: c.spec().Env, 196 Hostname: c.spec().Hostname, 197 WorkingDir: c.spec().Dir, 198 Image: c.image(), 199 ExposedPorts: c.exposedPorts(), 200 Healthcheck: c.healthcheck(), 201 } 202 203 if len(c.spec().Command) > 0 { 204 // If Command is provided, we replace the whole invocation with Command 205 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 206 // case. 207 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 208 config.Cmd = append(config.Cmd, c.spec().Args...) 209 } else if len(c.spec().Args) > 0 { 210 // In this case, we assume the image has an Entrypoint and Args 211 // specifies the arguments for that entrypoint. 212 config.Cmd = c.spec().Args 213 } 214 215 return config 216 } 217 218 func (c *containerConfig) labels() map[string]string { 219 var ( 220 system = map[string]string{ 221 "task": "", // mark as cluster task 222 "task.id": c.task.ID, 223 "task.name": c.name(), 224 "node.id": c.task.NodeID, 225 "service.id": c.task.ServiceID, 226 "service.name": c.task.ServiceAnnotations.Name, 227 } 228 labels = make(map[string]string) 229 ) 230 231 // base labels are those defined in the spec. 232 for k, v := range c.spec().Labels { 233 labels[k] = v 234 } 235 236 // we then apply the overrides from the task, which may be set via the 237 // orchestrator. 238 for k, v := range c.task.Annotations.Labels { 239 labels[k] = v 240 } 241 242 // finally, we apply the system labels, which override all labels. 243 for k, v := range system { 244 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 245 } 246 247 return labels 248 } 249 250 func (c *containerConfig) mounts() []enginemount.Mount { 251 var r []enginemount.Mount 252 for _, mount := range c.spec().Mounts { 253 r = append(r, convertMount(mount)) 254 } 255 return r 256 } 257 258 func convertMount(m api.Mount) enginemount.Mount { 259 mount := enginemount.Mount{ 260 Source: m.Source, 261 Target: m.Target, 262 ReadOnly: m.ReadOnly, 263 } 264 265 switch m.Type { 266 case api.MountTypeBind: 267 mount.Type = enginemount.TypeBind 268 case api.MountTypeVolume: 269 mount.Type = enginemount.TypeVolume 270 case api.MountTypeTmpfs: 271 mount.Type = enginemount.TypeTmpfs 272 } 273 274 if m.BindOptions != nil { 275 mount.BindOptions = &enginemount.BindOptions{} 276 switch m.BindOptions.Propagation { 277 case api.MountPropagationRPrivate: 278 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 279 case api.MountPropagationPrivate: 280 mount.BindOptions.Propagation = enginemount.PropagationPrivate 281 case api.MountPropagationRSlave: 282 mount.BindOptions.Propagation = enginemount.PropagationRSlave 283 case api.MountPropagationSlave: 284 mount.BindOptions.Propagation = enginemount.PropagationSlave 285 case api.MountPropagationRShared: 286 mount.BindOptions.Propagation = enginemount.PropagationRShared 287 case api.MountPropagationShared: 288 mount.BindOptions.Propagation = enginemount.PropagationShared 289 } 290 } 291 292 if m.VolumeOptions != nil { 293 mount.VolumeOptions = &enginemount.VolumeOptions{ 294 NoCopy: m.VolumeOptions.NoCopy, 295 } 296 if m.VolumeOptions.Labels != nil { 297 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 298 for k, v := range m.VolumeOptions.Labels { 299 mount.VolumeOptions.Labels[k] = v 300 } 301 } 302 if m.VolumeOptions.DriverConfig != nil { 303 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 304 Name: m.VolumeOptions.DriverConfig.Name, 305 } 306 if m.VolumeOptions.DriverConfig.Options != nil { 307 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 308 for k, v := range m.VolumeOptions.DriverConfig.Options { 309 mount.VolumeOptions.DriverConfig.Options[k] = v 310 } 311 } 312 } 313 } 314 315 if m.TmpfsOptions != nil { 316 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 317 SizeBytes: m.TmpfsOptions.SizeBytes, 318 Mode: m.TmpfsOptions.Mode, 319 } 320 } 321 322 return mount 323 } 324 325 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 326 hcSpec := c.spec().Healthcheck 327 if hcSpec == nil { 328 return nil 329 } 330 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 331 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 332 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 333 return &enginecontainer.HealthConfig{ 334 Test: hcSpec.Test, 335 Interval: interval, 336 Timeout: timeout, 337 Retries: int(hcSpec.Retries), 338 StartPeriod: startPeriod, 339 } 340 } 341 342 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 343 hc := &enginecontainer.HostConfig{ 344 Resources: c.resources(), 345 GroupAdd: c.spec().Groups, 346 PortBindings: c.portBindings(), 347 Mounts: c.mounts(), 348 ReadonlyRootfs: c.spec().ReadOnly, 349 } 350 351 if c.spec().DNSConfig != nil { 352 hc.DNS = c.spec().DNSConfig.Nameservers 353 hc.DNSSearch = c.spec().DNSConfig.Search 354 hc.DNSOptions = c.spec().DNSConfig.Options 355 } 356 357 c.applyPrivileges(hc) 358 359 // The format of extra hosts on swarmkit is specified in: 360 // http://man7.org/linux/man-pages/man5/hosts.5.html 361 // IP_address canonical_hostname [aliases...] 362 // However, the format of ExtraHosts in HostConfig is 363 // <host>:<ip> 364 // We need to do the conversion here 365 // (Alias is ignored for now) 366 for _, entry := range c.spec().Hosts { 367 parts := strings.Fields(entry) 368 if len(parts) > 1 { 369 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 370 } 371 } 372 373 if c.task.LogDriver != nil { 374 hc.LogConfig = enginecontainer.LogConfig{ 375 Type: c.task.LogDriver.Name, 376 Config: c.task.LogDriver.Options, 377 } 378 } 379 380 if len(c.task.Networks) > 0 { 381 labels := c.task.Networks[0].Network.Spec.Annotations.Labels 382 name := c.task.Networks[0].Network.Spec.Annotations.Name 383 if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" { 384 hc.NetworkMode = enginecontainer.NetworkMode(name) 385 } 386 } 387 388 return hc 389 } 390 391 // This handles the case of volumes that are defined inside a service Mount 392 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumesCreateBody { 393 var ( 394 driverName string 395 driverOpts map[string]string 396 labels map[string]string 397 ) 398 399 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 400 driverName = mount.VolumeOptions.DriverConfig.Name 401 driverOpts = mount.VolumeOptions.DriverConfig.Options 402 labels = mount.VolumeOptions.Labels 403 } 404 405 if mount.VolumeOptions != nil { 406 return &volumetypes.VolumesCreateBody{ 407 Name: mount.Source, 408 Driver: driverName, 409 DriverOpts: driverOpts, 410 Labels: labels, 411 } 412 } 413 return nil 414 } 415 416 func (c *containerConfig) resources() enginecontainer.Resources { 417 resources := enginecontainer.Resources{} 418 419 // If no limits are specified let the engine use its defaults. 420 // 421 // TODO(aluzzardi): We might want to set some limits anyway otherwise 422 // "unlimited" tasks will step over the reservation of other tasks. 423 r := c.task.Spec.Resources 424 if r == nil || r.Limits == nil { 425 return resources 426 } 427 428 if r.Limits.MemoryBytes > 0 { 429 resources.Memory = r.Limits.MemoryBytes 430 } 431 432 if r.Limits.NanoCPUs > 0 { 433 // CPU Period must be set in microseconds. 434 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 435 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 436 } 437 438 return resources 439 } 440 441 // Docker daemon supports just 1 network during container create. 442 func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 443 var networks []*api.NetworkAttachment 444 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 445 networks = c.task.Networks 446 } 447 448 epConfig := make(map[string]*network.EndpointSettings) 449 if len(networks) > 0 { 450 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b) 451 } 452 453 return &network.NetworkingConfig{EndpointsConfig: epConfig} 454 } 455 456 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 457 func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig { 458 var networks []*api.NetworkAttachment 459 if c.task.Spec.GetContainer() != nil { 460 networks = c.task.Networks 461 } 462 // First network is used during container create. Other networks are used in "docker network connect" 463 if len(networks) < 2 { 464 return nil 465 } 466 467 epConfig := make(map[string]*network.EndpointSettings) 468 for _, na := range networks[1:] { 469 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b) 470 } 471 return &network.NetworkingConfig{EndpointsConfig: epConfig} 472 } 473 474 func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings { 475 var ipv4, ipv6 string 476 for _, addr := range na.Addresses { 477 ip, _, err := net.ParseCIDR(addr) 478 if err != nil { 479 continue 480 } 481 482 if ip.To4() != nil { 483 ipv4 = ip.String() 484 continue 485 } 486 487 if ip.To16() != nil { 488 ipv6 = ip.String() 489 } 490 } 491 492 n := &network.EndpointSettings{ 493 NetworkID: na.Network.ID, 494 IPAMConfig: &network.EndpointIPAMConfig{ 495 IPv4Address: ipv4, 496 IPv6Address: ipv6, 497 }, 498 DriverOpts: na.DriverAttachmentOpts, 499 } 500 if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" { 501 if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil { 502 n.NetworkID = ln.ID() 503 } 504 } 505 return n 506 } 507 508 func (c *containerConfig) virtualIP(networkID string) string { 509 if c.task.Endpoint == nil { 510 return "" 511 } 512 513 for _, eVip := range c.task.Endpoint.VirtualIPs { 514 // We only support IPv4 VIPs for now. 515 if eVip.NetworkID == networkID { 516 vip, _, err := net.ParseCIDR(eVip.Addr) 517 if err != nil { 518 return "" 519 } 520 521 return vip.String() 522 } 523 } 524 525 return "" 526 } 527 528 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 529 if len(c.task.Networks) == 0 { 530 return nil 531 } 532 533 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 534 svcCfg := &clustertypes.ServiceConfig{ 535 Name: c.task.ServiceAnnotations.Name, 536 Aliases: make(map[string][]string), 537 ID: c.task.ServiceID, 538 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 539 } 540 541 for _, na := range c.task.Networks { 542 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 543 // We support only IPv4 virtual IP for now. 544 IPv4: c.virtualIP(na.Network.ID), 545 } 546 if len(na.Aliases) > 0 { 547 svcCfg.Aliases[na.Network.ID] = na.Aliases 548 } 549 } 550 551 if c.task.Endpoint != nil { 552 for _, ePort := range c.task.Endpoint.Ports { 553 if ePort.PublishMode != api.PublishModeIngress { 554 continue 555 } 556 557 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 558 Name: ePort.Name, 559 Protocol: int32(ePort.Protocol), 560 TargetPort: ePort.TargetPort, 561 PublishedPort: ePort.PublishedPort, 562 }) 563 } 564 } 565 566 return svcCfg 567 } 568 569 // networks returns a list of network names attached to the container. The 570 // returned name can be used to lookup the corresponding network create 571 // options. 572 func (c *containerConfig) networks() []string { 573 var networks []string 574 575 for name := range c.networksAttachments { 576 networks = append(networks, name) 577 } 578 579 return networks 580 } 581 582 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 583 na, ok := c.networksAttachments[name] 584 if !ok { 585 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 586 } 587 588 options := types.NetworkCreate{ 589 // ID: na.Network.ID, 590 Labels: na.Network.Spec.Annotations.Labels, 591 Internal: na.Network.Spec.Internal, 592 Attachable: na.Network.Spec.Attachable, 593 Ingress: convert.IsIngressNetwork(na.Network), 594 EnableIPv6: na.Network.Spec.Ipv6Enabled, 595 CheckDuplicate: true, 596 Scope: netconst.SwarmScope, 597 } 598 599 if na.Network.Spec.GetNetwork() != "" { 600 options.ConfigFrom = &network.ConfigReference{ 601 Network: na.Network.Spec.GetNetwork(), 602 } 603 } 604 605 if na.Network.DriverState != nil { 606 options.Driver = na.Network.DriverState.Name 607 options.Options = na.Network.DriverState.Options 608 } 609 if na.Network.IPAM != nil { 610 options.IPAM = &network.IPAM{ 611 Driver: na.Network.IPAM.Driver.Name, 612 Options: na.Network.IPAM.Driver.Options, 613 } 614 for _, ic := range na.Network.IPAM.Configs { 615 c := network.IPAMConfig{ 616 Subnet: ic.Subnet, 617 IPRange: ic.Range, 618 Gateway: ic.Gateway, 619 } 620 options.IPAM.Config = append(options.IPAM.Config, c) 621 } 622 } 623 624 return clustertypes.NetworkCreateRequest{ 625 ID: na.Network.ID, 626 NetworkCreateRequest: types.NetworkCreateRequest{ 627 Name: name, 628 NetworkCreate: options, 629 }, 630 }, nil 631 } 632 633 func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) { 634 privileges := c.spec().Privileges 635 if privileges == nil { 636 return 637 } 638 639 credentials := privileges.CredentialSpec 640 if credentials != nil { 641 switch credentials.Source.(type) { 642 case *api.Privileges_CredentialSpec_File: 643 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile()) 644 case *api.Privileges_CredentialSpec_Registry: 645 hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry()) 646 } 647 } 648 649 selinux := privileges.SELinuxContext 650 if selinux != nil { 651 if selinux.Disable { 652 hc.SecurityOpt = append(hc.SecurityOpt, "label=disable") 653 } 654 if selinux.User != "" { 655 hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User) 656 } 657 if selinux.Role != "" { 658 hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role) 659 } 660 if selinux.Level != "" { 661 hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level) 662 } 663 if selinux.Type != "" { 664 hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type) 665 } 666 } 667 } 668 669 func (c containerConfig) eventFilter() filters.Args { 670 filter := filters.NewArgs() 671 filter.Add("type", events.ContainerEventType) 672 filter.Add("name", c.name()) 673 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 674 return filter 675 }