github.com/fabiokung/docker@v0.11.2-0.20170222101415-4534dcd49497/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/Sirupsen/logrus" 12 13 "github.com/docker/distribution/reference" 14 "github.com/docker/docker/api/types" 15 enginecontainer "github.com/docker/docker/api/types/container" 16 "github.com/docker/docker/api/types/events" 17 "github.com/docker/docker/api/types/filters" 18 enginemount "github.com/docker/docker/api/types/mount" 19 "github.com/docker/docker/api/types/network" 20 volumetypes "github.com/docker/docker/api/types/volume" 21 clustertypes "github.com/docker/docker/daemon/cluster/provider" 22 "github.com/docker/go-connections/nat" 23 "github.com/docker/swarmkit/agent/exec" 24 "github.com/docker/swarmkit/api" 25 "github.com/docker/swarmkit/template" 26 gogotypes "github.com/gogo/protobuf/types" 27 ) 28 29 const ( 30 // Explicitly use the kernel's default setting for CPU quota of 100ms. 31 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 32 cpuQuotaPeriod = 100 * time.Millisecond 33 34 // systemLabelPrefix represents the reserved namespace for system labels. 35 systemLabelPrefix = "com.docker.swarm" 36 ) 37 38 // containerConfig converts task properties into docker container compatible 39 // components. 40 type containerConfig struct { 41 task *api.Task 42 networksAttachments map[string]*api.NetworkAttachment 43 } 44 45 // newContainerConfig returns a validated container config. No methods should 46 // return an error if this function returns without error. 47 func newContainerConfig(t *api.Task) (*containerConfig, error) { 48 var c containerConfig 49 return &c, c.setTask(t) 50 } 51 52 func (c *containerConfig) setTask(t *api.Task) error { 53 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 54 return exec.ErrRuntimeUnsupported 55 } 56 57 container := t.Spec.GetContainer() 58 if container != nil { 59 if container.Image == "" { 60 return ErrImageRequired 61 } 62 63 if err := validateMounts(container.Mounts); err != nil { 64 return err 65 } 66 } 67 68 // index the networks by name 69 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 70 for _, attachment := range t.Networks { 71 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 72 } 73 74 c.task = t 75 76 if t.Spec.GetContainer() != nil { 77 preparedSpec, err := template.ExpandContainerSpec(t) 78 if err != nil { 79 return err 80 } 81 c.task.Spec.Runtime = &api.TaskSpec_Container{ 82 Container: preparedSpec, 83 } 84 } 85 86 return nil 87 } 88 89 func (c *containerConfig) id() string { 90 attachment := c.task.Spec.GetAttachment() 91 if attachment == nil { 92 return "" 93 } 94 95 return attachment.ContainerID 96 } 97 98 func (c *containerConfig) taskID() string { 99 return c.task.ID 100 } 101 102 func (c *containerConfig) endpoint() *api.Endpoint { 103 return c.task.Endpoint 104 } 105 106 func (c *containerConfig) spec() *api.ContainerSpec { 107 return c.task.Spec.GetContainer() 108 } 109 110 func (c *containerConfig) nameOrID() string { 111 if c.task.Spec.GetContainer() != nil { 112 return c.name() 113 } 114 115 return c.id() 116 } 117 118 func (c *containerConfig) name() string { 119 if c.task.Annotations.Name != "" { 120 // if set, use the container Annotations.Name field, set in the orchestrator. 121 return c.task.Annotations.Name 122 } 123 124 slot := fmt.Sprint(c.task.Slot) 125 if slot == "" || c.task.Slot == 0 { 126 slot = c.task.NodeID 127 } 128 129 // fallback to service.slot.id. 130 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 131 } 132 133 func (c *containerConfig) image() string { 134 raw := c.spec().Image 135 ref, err := reference.ParseNormalizedNamed(raw) 136 if err != nil { 137 return raw 138 } 139 return reference.FamiliarString(reference.TagNameOnly(ref)) 140 } 141 142 func (c *containerConfig) portBindings() nat.PortMap { 143 portBindings := nat.PortMap{} 144 if c.task.Endpoint == nil { 145 return portBindings 146 } 147 148 for _, portConfig := range c.task.Endpoint.Ports { 149 if portConfig.PublishMode != api.PublishModeHost { 150 continue 151 } 152 153 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 154 binding := []nat.PortBinding{ 155 {}, 156 } 157 158 if portConfig.PublishedPort != 0 { 159 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 160 } 161 portBindings[port] = binding 162 } 163 164 return portBindings 165 } 166 167 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 168 exposedPorts := make(map[nat.Port]struct{}) 169 if c.task.Endpoint == nil { 170 return exposedPorts 171 } 172 173 for _, portConfig := range c.task.Endpoint.Ports { 174 if portConfig.PublishMode != api.PublishModeHost { 175 continue 176 } 177 178 port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String()))) 179 exposedPorts[port] = struct{}{} 180 } 181 182 return exposedPorts 183 } 184 185 func (c *containerConfig) config() *enginecontainer.Config { 186 config := &enginecontainer.Config{ 187 Labels: c.labels(), 188 Tty: c.spec().TTY, 189 OpenStdin: c.spec().OpenStdin, 190 User: c.spec().User, 191 Env: c.spec().Env, 192 Hostname: c.spec().Hostname, 193 WorkingDir: c.spec().Dir, 194 Image: c.image(), 195 ExposedPorts: c.exposedPorts(), 196 Healthcheck: c.healthcheck(), 197 } 198 199 if len(c.spec().Command) > 0 { 200 // If Command is provided, we replace the whole invocation with Command 201 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 202 // case. 203 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 204 config.Cmd = append(config.Cmd, c.spec().Args...) 205 } else if len(c.spec().Args) > 0 { 206 // In this case, we assume the image has an Entrypoint and Args 207 // specifies the arguments for that entrypoint. 208 config.Cmd = c.spec().Args 209 } 210 211 return config 212 } 213 214 func (c *containerConfig) labels() map[string]string { 215 var ( 216 system = map[string]string{ 217 "task": "", // mark as cluster task 218 "task.id": c.task.ID, 219 "task.name": c.name(), 220 "node.id": c.task.NodeID, 221 "service.id": c.task.ServiceID, 222 "service.name": c.task.ServiceAnnotations.Name, 223 } 224 labels = make(map[string]string) 225 ) 226 227 // base labels are those defined in the spec. 228 for k, v := range c.spec().Labels { 229 labels[k] = v 230 } 231 232 // we then apply the overrides from the task, which may be set via the 233 // orchestrator. 234 for k, v := range c.task.Annotations.Labels { 235 labels[k] = v 236 } 237 238 // finally, we apply the system labels, which override all labels. 239 for k, v := range system { 240 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 241 } 242 243 return labels 244 } 245 246 func (c *containerConfig) mounts() []enginemount.Mount { 247 var r []enginemount.Mount 248 for _, mount := range c.spec().Mounts { 249 r = append(r, convertMount(mount)) 250 } 251 return r 252 } 253 254 func convertMount(m api.Mount) enginemount.Mount { 255 mount := enginemount.Mount{ 256 Source: m.Source, 257 Target: m.Target, 258 ReadOnly: m.ReadOnly, 259 } 260 261 switch m.Type { 262 case api.MountTypeBind: 263 mount.Type = enginemount.TypeBind 264 case api.MountTypeVolume: 265 mount.Type = enginemount.TypeVolume 266 case api.MountTypeTmpfs: 267 mount.Type = enginemount.TypeTmpfs 268 } 269 270 if m.BindOptions != nil { 271 mount.BindOptions = &enginemount.BindOptions{} 272 switch m.BindOptions.Propagation { 273 case api.MountPropagationRPrivate: 274 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 275 case api.MountPropagationPrivate: 276 mount.BindOptions.Propagation = enginemount.PropagationPrivate 277 case api.MountPropagationRSlave: 278 mount.BindOptions.Propagation = enginemount.PropagationRSlave 279 case api.MountPropagationSlave: 280 mount.BindOptions.Propagation = enginemount.PropagationSlave 281 case api.MountPropagationRShared: 282 mount.BindOptions.Propagation = enginemount.PropagationRShared 283 case api.MountPropagationShared: 284 mount.BindOptions.Propagation = enginemount.PropagationShared 285 } 286 } 287 288 if m.VolumeOptions != nil { 289 mount.VolumeOptions = &enginemount.VolumeOptions{ 290 NoCopy: m.VolumeOptions.NoCopy, 291 } 292 if m.VolumeOptions.Labels != nil { 293 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 294 for k, v := range m.VolumeOptions.Labels { 295 mount.VolumeOptions.Labels[k] = v 296 } 297 } 298 if m.VolumeOptions.DriverConfig != nil { 299 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 300 Name: m.VolumeOptions.DriverConfig.Name, 301 } 302 if m.VolumeOptions.DriverConfig.Options != nil { 303 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 304 for k, v := range m.VolumeOptions.DriverConfig.Options { 305 mount.VolumeOptions.DriverConfig.Options[k] = v 306 } 307 } 308 } 309 } 310 311 if m.TmpfsOptions != nil { 312 mount.TmpfsOptions = &enginemount.TmpfsOptions{ 313 SizeBytes: m.TmpfsOptions.SizeBytes, 314 Mode: m.TmpfsOptions.Mode, 315 } 316 } 317 318 return mount 319 } 320 321 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 322 hcSpec := c.spec().Healthcheck 323 if hcSpec == nil { 324 return nil 325 } 326 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 327 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 328 return &enginecontainer.HealthConfig{ 329 Test: hcSpec.Test, 330 Interval: interval, 331 Timeout: timeout, 332 Retries: int(hcSpec.Retries), 333 } 334 } 335 336 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 337 hc := &enginecontainer.HostConfig{ 338 Resources: c.resources(), 339 GroupAdd: c.spec().Groups, 340 PortBindings: c.portBindings(), 341 Mounts: c.mounts(), 342 ReadonlyRootfs: c.spec().ReadOnly, 343 } 344 345 if c.spec().DNSConfig != nil { 346 hc.DNS = c.spec().DNSConfig.Nameservers 347 hc.DNSSearch = c.spec().DNSConfig.Search 348 hc.DNSOptions = c.spec().DNSConfig.Options 349 } 350 351 // The format of extra hosts on swarmkit is specified in: 352 // http://man7.org/linux/man-pages/man5/hosts.5.html 353 // IP_address canonical_hostname [aliases...] 354 // However, the format of ExtraHosts in HostConfig is 355 // <host>:<ip> 356 // We need to do the conversion here 357 // (Alias is ignored for now) 358 for _, entry := range c.spec().Hosts { 359 parts := strings.Fields(entry) 360 if len(parts) > 1 { 361 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 362 } 363 } 364 365 if c.task.LogDriver != nil { 366 hc.LogConfig = enginecontainer.LogConfig{ 367 Type: c.task.LogDriver.Name, 368 Config: c.task.LogDriver.Options, 369 } 370 } 371 372 return hc 373 } 374 375 // This handles the case of volumes that are defined inside a service Mount 376 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumesCreateBody { 377 var ( 378 driverName string 379 driverOpts map[string]string 380 labels map[string]string 381 ) 382 383 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 384 driverName = mount.VolumeOptions.DriverConfig.Name 385 driverOpts = mount.VolumeOptions.DriverConfig.Options 386 labels = mount.VolumeOptions.Labels 387 } 388 389 if mount.VolumeOptions != nil { 390 return &volumetypes.VolumesCreateBody{ 391 Name: mount.Source, 392 Driver: driverName, 393 DriverOpts: driverOpts, 394 Labels: labels, 395 } 396 } 397 return nil 398 } 399 400 func (c *containerConfig) resources() enginecontainer.Resources { 401 resources := enginecontainer.Resources{} 402 403 // If no limits are specified let the engine use its defaults. 404 // 405 // TODO(aluzzardi): We might want to set some limits anyway otherwise 406 // "unlimited" tasks will step over the reservation of other tasks. 407 r := c.task.Spec.Resources 408 if r == nil || r.Limits == nil { 409 return resources 410 } 411 412 if r.Limits.MemoryBytes > 0 { 413 resources.Memory = r.Limits.MemoryBytes 414 } 415 416 if r.Limits.NanoCPUs > 0 { 417 // CPU Period must be set in microseconds. 418 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 419 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 420 } 421 422 return resources 423 } 424 425 // Docker daemon supports just 1 network during container create. 426 func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig { 427 var networks []*api.NetworkAttachment 428 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 429 networks = c.task.Networks 430 } 431 432 epConfig := make(map[string]*network.EndpointSettings) 433 if len(networks) > 0 { 434 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0]) 435 } 436 437 return &network.NetworkingConfig{EndpointsConfig: epConfig} 438 } 439 440 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 441 func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig { 442 var networks []*api.NetworkAttachment 443 if c.task.Spec.GetContainer() != nil { 444 networks = c.task.Networks 445 } 446 447 // First network is used during container create. Other networks are used in "docker network connect" 448 if len(networks) < 2 { 449 return nil 450 } 451 452 epConfig := make(map[string]*network.EndpointSettings) 453 for _, na := range networks[1:] { 454 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na) 455 } 456 return &network.NetworkingConfig{EndpointsConfig: epConfig} 457 } 458 459 func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings { 460 var ipv4, ipv6 string 461 for _, addr := range na.Addresses { 462 ip, _, err := net.ParseCIDR(addr) 463 if err != nil { 464 continue 465 } 466 467 if ip.To4() != nil { 468 ipv4 = ip.String() 469 continue 470 } 471 472 if ip.To16() != nil { 473 ipv6 = ip.String() 474 } 475 } 476 477 return &network.EndpointSettings{ 478 NetworkID: na.Network.ID, 479 IPAMConfig: &network.EndpointIPAMConfig{ 480 IPv4Address: ipv4, 481 IPv6Address: ipv6, 482 }, 483 } 484 } 485 486 func (c *containerConfig) virtualIP(networkID string) string { 487 if c.task.Endpoint == nil { 488 return "" 489 } 490 491 for _, eVip := range c.task.Endpoint.VirtualIPs { 492 // We only support IPv4 VIPs for now. 493 if eVip.NetworkID == networkID { 494 vip, _, err := net.ParseCIDR(eVip.Addr) 495 if err != nil { 496 return "" 497 } 498 499 return vip.String() 500 } 501 } 502 503 return "" 504 } 505 506 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 507 if len(c.task.Networks) == 0 { 508 return nil 509 } 510 511 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 512 svcCfg := &clustertypes.ServiceConfig{ 513 Name: c.task.ServiceAnnotations.Name, 514 Aliases: make(map[string][]string), 515 ID: c.task.ServiceID, 516 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 517 } 518 519 for _, na := range c.task.Networks { 520 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 521 // We support only IPv4 virtual IP for now. 522 IPv4: c.virtualIP(na.Network.ID), 523 } 524 if len(na.Aliases) > 0 { 525 svcCfg.Aliases[na.Network.ID] = na.Aliases 526 } 527 } 528 529 if c.task.Endpoint != nil { 530 for _, ePort := range c.task.Endpoint.Ports { 531 if ePort.PublishMode != api.PublishModeIngress { 532 continue 533 } 534 535 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 536 Name: ePort.Name, 537 Protocol: int32(ePort.Protocol), 538 TargetPort: ePort.TargetPort, 539 PublishedPort: ePort.PublishedPort, 540 }) 541 } 542 } 543 544 return svcCfg 545 } 546 547 // networks returns a list of network names attached to the container. The 548 // returned name can be used to lookup the corresponding network create 549 // options. 550 func (c *containerConfig) networks() []string { 551 var networks []string 552 553 for name := range c.networksAttachments { 554 networks = append(networks, name) 555 } 556 557 return networks 558 } 559 560 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 561 na, ok := c.networksAttachments[name] 562 if !ok { 563 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 564 } 565 566 options := types.NetworkCreate{ 567 // ID: na.Network.ID, 568 Driver: na.Network.DriverState.Name, 569 IPAM: &network.IPAM{ 570 Driver: na.Network.IPAM.Driver.Name, 571 Options: na.Network.IPAM.Driver.Options, 572 }, 573 Options: na.Network.DriverState.Options, 574 Labels: na.Network.Spec.Annotations.Labels, 575 Internal: na.Network.Spec.Internal, 576 Attachable: na.Network.Spec.Attachable, 577 EnableIPv6: na.Network.Spec.Ipv6Enabled, 578 CheckDuplicate: true, 579 } 580 581 for _, ic := range na.Network.IPAM.Configs { 582 c := network.IPAMConfig{ 583 Subnet: ic.Subnet, 584 IPRange: ic.Range, 585 Gateway: ic.Gateway, 586 } 587 options.IPAM.Config = append(options.IPAM.Config, c) 588 } 589 590 return clustertypes.NetworkCreateRequest{ 591 ID: na.Network.ID, 592 NetworkCreateRequest: types.NetworkCreateRequest{ 593 Name: name, 594 NetworkCreate: options, 595 }, 596 }, nil 597 } 598 599 func (c containerConfig) eventFilter() filters.Args { 600 filter := filters.NewArgs() 601 filter.Add("type", events.ContainerEventType) 602 filter.Add("name", c.name()) 603 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 604 return filter 605 }