github.com/kim0/docker@v0.6.2-0.20161130212042-4addda3f07e7/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strings" 8 "time" 9 10 "github.com/Sirupsen/logrus" 11 12 "github.com/docker/docker/api/types" 13 enginecontainer "github.com/docker/docker/api/types/container" 14 "github.com/docker/docker/api/types/events" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/network" 17 clustertypes "github.com/docker/docker/daemon/cluster/provider" 18 "github.com/docker/docker/reference" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 "github.com/docker/swarmkit/protobuf/ptypes" 22 ) 23 24 const ( 25 // Explicitly use the kernel's default setting for CPU quota of 100ms. 26 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 27 cpuQuotaPeriod = 100 * time.Millisecond 28 29 // systemLabelPrefix represents the reserved namespace for system labels. 30 systemLabelPrefix = "com.docker.swarm" 31 ) 32 33 // containerConfig converts task properties into docker container compatible 34 // components. 35 type containerConfig struct { 36 task *api.Task 37 networksAttachments map[string]*api.NetworkAttachment 38 } 39 40 // newContainerConfig returns a validated container config. No methods should 41 // return an error if this function returns without error. 42 func newContainerConfig(t *api.Task) (*containerConfig, error) { 43 var c containerConfig 44 return &c, c.setTask(t) 45 } 46 47 func (c *containerConfig) setTask(t *api.Task) error { 48 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 49 return exec.ErrRuntimeUnsupported 50 } 51 52 container := t.Spec.GetContainer() 53 if container != nil { 54 if container.Image == "" { 55 return ErrImageRequired 56 } 57 58 if err := validateMounts(container.Mounts); err != nil { 59 return err 60 } 61 } 62 63 // index the networks by name 64 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 65 for _, attachment := range t.Networks { 66 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 67 } 68 69 c.task = t 70 return nil 71 } 72 73 func (c *containerConfig) id() string { 74 attachment := c.task.Spec.GetAttachment() 75 if attachment == nil { 76 return "" 77 } 78 79 return attachment.ContainerID 80 } 81 82 func (c *containerConfig) taskID() string { 83 return c.task.ID 84 } 85 86 func (c *containerConfig) endpoint() *api.Endpoint { 87 return c.task.Endpoint 88 } 89 90 func (c *containerConfig) spec() *api.ContainerSpec { 91 return c.task.Spec.GetContainer() 92 } 93 94 func (c *containerConfig) nameOrID() string { 95 if c.task.Spec.GetContainer() != nil { 96 return c.name() 97 } 98 99 return c.id() 100 } 101 102 func (c *containerConfig) name() string { 103 if c.task.Annotations.Name != "" { 104 // if set, use the container Annotations.Name field, set in the orchestrator. 105 return c.task.Annotations.Name 106 } 107 108 slot := fmt.Sprint(c.task.Slot) 109 if slot == "" || c.task.Slot == 0 { 110 slot = c.task.NodeID 111 } 112 113 // fallback to service.slot.id. 114 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 115 } 116 117 func (c *containerConfig) image() string { 118 raw := c.spec().Image 119 ref, err := reference.ParseNamed(raw) 120 if err != nil { 121 return raw 122 } 123 return reference.WithDefaultTag(ref).String() 124 } 125 126 func (c *containerConfig) config() *enginecontainer.Config { 127 config := &enginecontainer.Config{ 128 Labels: c.labels(), 129 User: c.spec().User, 130 Env: c.spec().Env, 131 WorkingDir: c.spec().Dir, 132 Image: c.image(), 133 Volumes: c.volumes(), 134 Healthcheck: c.healthcheck(), 135 } 136 137 if len(c.spec().Command) > 0 { 138 // If Command is provided, we replace the whole invocation with Command 139 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 140 // case. 141 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 142 config.Cmd = append(config.Cmd, c.spec().Args...) 143 } else if len(c.spec().Args) > 0 { 144 // In this case, we assume the image has an Entrypoint and Args 145 // specifies the arguments for that entrypoint. 146 config.Cmd = c.spec().Args 147 } 148 149 return config 150 } 151 152 func (c *containerConfig) labels() map[string]string { 153 var ( 154 system = map[string]string{ 155 "task": "", // mark as cluster task 156 "task.id": c.task.ID, 157 "task.name": c.name(), 158 "node.id": c.task.NodeID, 159 "service.id": c.task.ServiceID, 160 "service.name": c.task.ServiceAnnotations.Name, 161 } 162 labels = make(map[string]string) 163 ) 164 165 // base labels are those defined in the spec. 166 for k, v := range c.spec().Labels { 167 labels[k] = v 168 } 169 170 // we then apply the overrides from the task, which may be set via the 171 // orchestrator. 172 for k, v := range c.task.Annotations.Labels { 173 labels[k] = v 174 } 175 176 // finally, we apply the system labels, which override all labels. 177 for k, v := range system { 178 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 179 } 180 181 return labels 182 } 183 184 // volumes gets placed into the Volumes field on the containerConfig. 185 func (c *containerConfig) volumes() map[string]struct{} { 186 r := make(map[string]struct{}) 187 // Volumes *only* creates anonymous volumes. The rest is mixed in with 188 // binds, which aren't actually binds. Basically, any volume that 189 // results in a single component must be added here. 190 // 191 // This is reversed engineered from the behavior of the engine API. 192 for _, mount := range c.spec().Mounts { 193 if mount.Type == api.MountTypeVolume && mount.Source == "" { 194 r[mount.Target] = struct{}{} 195 } 196 } 197 return r 198 } 199 200 func (c *containerConfig) tmpfs() map[string]string { 201 r := make(map[string]string) 202 203 for _, spec := range c.spec().Mounts { 204 if spec.Type != api.MountTypeTmpfs { 205 continue 206 } 207 208 r[spec.Target] = getMountMask(&spec) 209 } 210 211 return r 212 } 213 214 func (c *containerConfig) binds() []string { 215 var r []string 216 for _, mount := range c.spec().Mounts { 217 if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") { 218 spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target) 219 mask := getMountMask(&mount) 220 if mask != "" { 221 spec = fmt.Sprintf("%s:%s", spec, mask) 222 } 223 r = append(r, spec) 224 } 225 } 226 return r 227 } 228 229 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 230 hcSpec := c.spec().Healthcheck 231 if hcSpec == nil { 232 return nil 233 } 234 interval, _ := ptypes.Duration(hcSpec.Interval) 235 timeout, _ := ptypes.Duration(hcSpec.Timeout) 236 return &enginecontainer.HealthConfig{ 237 Test: hcSpec.Test, 238 Interval: interval, 239 Timeout: timeout, 240 Retries: int(hcSpec.Retries), 241 } 242 } 243 244 func getMountMask(m *api.Mount) string { 245 var maskOpts []string 246 if m.ReadOnly { 247 maskOpts = append(maskOpts, "ro") 248 } 249 250 switch m.Type { 251 case api.MountTypeVolume: 252 if m.VolumeOptions != nil && m.VolumeOptions.NoCopy { 253 maskOpts = append(maskOpts, "nocopy") 254 } 255 case api.MountTypeBind: 256 if m.BindOptions == nil { 257 break 258 } 259 260 switch m.BindOptions.Propagation { 261 case api.MountPropagationPrivate: 262 maskOpts = append(maskOpts, "private") 263 case api.MountPropagationRPrivate: 264 maskOpts = append(maskOpts, "rprivate") 265 case api.MountPropagationShared: 266 maskOpts = append(maskOpts, "shared") 267 case api.MountPropagationRShared: 268 maskOpts = append(maskOpts, "rshared") 269 case api.MountPropagationSlave: 270 maskOpts = append(maskOpts, "slave") 271 case api.MountPropagationRSlave: 272 maskOpts = append(maskOpts, "rslave") 273 } 274 case api.MountTypeTmpfs: 275 if m.TmpfsOptions == nil { 276 break 277 } 278 279 if m.TmpfsOptions.Mode != 0 { 280 maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode)) 281 } 282 283 if m.TmpfsOptions.SizeBytes != 0 { 284 // calculate suffix here, making this linux specific, but that is 285 // okay, since API is that way anyways. 286 287 // we do this by finding the suffix that divides evenly into the 288 // value, returing the value itself, with no suffix, if it fails. 289 // 290 // For the most part, we don't enforce any semantic to this values. 291 // The operating system will usually align this and enforce minimum 292 // and maximums. 293 var ( 294 size = m.TmpfsOptions.SizeBytes 295 suffix string 296 ) 297 for _, r := range []struct { 298 suffix string 299 divisor int64 300 }{ 301 {"g", 1 << 30}, 302 {"m", 1 << 20}, 303 {"k", 1 << 10}, 304 } { 305 if size%r.divisor == 0 { 306 size = size / r.divisor 307 suffix = r.suffix 308 break 309 } 310 } 311 312 maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix)) 313 } 314 } 315 316 return strings.Join(maskOpts, ",") 317 } 318 319 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 320 hc := &enginecontainer.HostConfig{ 321 Resources: c.resources(), 322 Binds: c.binds(), 323 Tmpfs: c.tmpfs(), 324 GroupAdd: c.spec().Groups, 325 } 326 327 if c.task.LogDriver != nil { 328 hc.LogConfig = enginecontainer.LogConfig{ 329 Type: c.task.LogDriver.Name, 330 Config: c.task.LogDriver.Options, 331 } 332 } 333 334 return hc 335 } 336 337 // This handles the case of volumes that are defined inside a service Mount 338 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest { 339 var ( 340 driverName string 341 driverOpts map[string]string 342 labels map[string]string 343 ) 344 345 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 346 driverName = mount.VolumeOptions.DriverConfig.Name 347 driverOpts = mount.VolumeOptions.DriverConfig.Options 348 labels = mount.VolumeOptions.Labels 349 } 350 351 if mount.VolumeOptions != nil { 352 return &types.VolumeCreateRequest{ 353 Name: mount.Source, 354 Driver: driverName, 355 DriverOpts: driverOpts, 356 Labels: labels, 357 } 358 } 359 return nil 360 } 361 362 func (c *containerConfig) resources() enginecontainer.Resources { 363 resources := enginecontainer.Resources{} 364 365 // If no limits are specified let the engine use its defaults. 366 // 367 // TODO(aluzzardi): We might want to set some limits anyway otherwise 368 // "unlimited" tasks will step over the reservation of other tasks. 369 r := c.task.Spec.Resources 370 if r == nil || r.Limits == nil { 371 return resources 372 } 373 374 if r.Limits.MemoryBytes > 0 { 375 resources.Memory = r.Limits.MemoryBytes 376 } 377 378 if r.Limits.NanoCPUs > 0 { 379 // CPU Period must be set in microseconds. 380 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 381 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 382 } 383 384 return resources 385 } 386 387 // Docker daemon supports just 1 network during container create. 388 func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig { 389 var networks []*api.NetworkAttachment 390 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 391 networks = c.task.Networks 392 } 393 394 epConfig := make(map[string]*network.EndpointSettings) 395 if len(networks) > 0 { 396 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0]) 397 } 398 399 return &network.NetworkingConfig{EndpointsConfig: epConfig} 400 } 401 402 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 403 func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig { 404 var networks []*api.NetworkAttachment 405 if c.task.Spec.GetContainer() != nil { 406 networks = c.task.Networks 407 } 408 409 // First network is used during container create. Other networks are used in "docker network connect" 410 if len(networks) < 2 { 411 return nil 412 } 413 414 epConfig := make(map[string]*network.EndpointSettings) 415 for _, na := range networks[1:] { 416 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na) 417 } 418 return &network.NetworkingConfig{EndpointsConfig: epConfig} 419 } 420 421 func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings { 422 var ipv4, ipv6 string 423 for _, addr := range na.Addresses { 424 ip, _, err := net.ParseCIDR(addr) 425 if err != nil { 426 continue 427 } 428 429 if ip.To4() != nil { 430 ipv4 = ip.String() 431 continue 432 } 433 434 if ip.To16() != nil { 435 ipv6 = ip.String() 436 } 437 } 438 439 return &network.EndpointSettings{ 440 NetworkID: na.Network.ID, 441 IPAMConfig: &network.EndpointIPAMConfig{ 442 IPv4Address: ipv4, 443 IPv6Address: ipv6, 444 }, 445 } 446 } 447 448 func (c *containerConfig) virtualIP(networkID string) string { 449 if c.task.Endpoint == nil { 450 return "" 451 } 452 453 for _, eVip := range c.task.Endpoint.VirtualIPs { 454 // We only support IPv4 VIPs for now. 455 if eVip.NetworkID == networkID { 456 vip, _, err := net.ParseCIDR(eVip.Addr) 457 if err != nil { 458 return "" 459 } 460 461 return vip.String() 462 } 463 } 464 465 return "" 466 } 467 468 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 469 if len(c.task.Networks) == 0 { 470 return nil 471 } 472 473 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 474 svcCfg := &clustertypes.ServiceConfig{ 475 Name: c.task.ServiceAnnotations.Name, 476 Aliases: make(map[string][]string), 477 ID: c.task.ServiceID, 478 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 479 } 480 481 for _, na := range c.task.Networks { 482 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 483 // We support only IPv4 virtual IP for now. 484 IPv4: c.virtualIP(na.Network.ID), 485 } 486 if len(na.Aliases) > 0 { 487 svcCfg.Aliases[na.Network.ID] = na.Aliases 488 } 489 } 490 491 if c.task.Endpoint != nil { 492 for _, ePort := range c.task.Endpoint.Ports { 493 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 494 Name: ePort.Name, 495 Protocol: int32(ePort.Protocol), 496 TargetPort: ePort.TargetPort, 497 PublishedPort: ePort.PublishedPort, 498 }) 499 } 500 } 501 502 return svcCfg 503 } 504 505 // networks returns a list of network names attached to the container. The 506 // returned name can be used to lookup the corresponding network create 507 // options. 508 func (c *containerConfig) networks() []string { 509 var networks []string 510 511 for name := range c.networksAttachments { 512 networks = append(networks, name) 513 } 514 515 return networks 516 } 517 518 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 519 na, ok := c.networksAttachments[name] 520 if !ok { 521 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 522 } 523 524 options := types.NetworkCreate{ 525 // ID: na.Network.ID, 526 Driver: na.Network.DriverState.Name, 527 IPAM: &network.IPAM{ 528 Driver: na.Network.IPAM.Driver.Name, 529 }, 530 Options: na.Network.DriverState.Options, 531 Labels: na.Network.Spec.Annotations.Labels, 532 Internal: na.Network.Spec.Internal, 533 EnableIPv6: na.Network.Spec.Ipv6Enabled, 534 CheckDuplicate: true, 535 } 536 537 for _, ic := range na.Network.IPAM.Configs { 538 c := network.IPAMConfig{ 539 Subnet: ic.Subnet, 540 IPRange: ic.Range, 541 Gateway: ic.Gateway, 542 } 543 options.IPAM.Config = append(options.IPAM.Config, c) 544 } 545 546 return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil 547 } 548 549 func (c containerConfig) eventFilter() filters.Args { 550 filter := filters.NewArgs() 551 filter.Add("type", events.ContainerEventType) 552 filter.Add("name", c.name()) 553 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 554 return filter 555 }