github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strings" 8 "time" 9 10 "github.com/Sirupsen/logrus" 11 12 "github.com/docker/docker/api/types" 13 enginecontainer "github.com/docker/docker/api/types/container" 14 "github.com/docker/docker/api/types/events" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/network" 17 clustertypes "github.com/docker/docker/daemon/cluster/provider" 18 "github.com/docker/docker/reference" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 ) 22 23 const ( 24 // Explicitly use the kernel's default setting for CPU quota of 100ms. 25 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 26 cpuQuotaPeriod = 100 * time.Millisecond 27 28 // systemLabelPrefix represents the reserved namespace for system labels. 29 systemLabelPrefix = "com.docker.swarm" 30 ) 31 32 // containerConfig converts task properties into docker container compatible 33 // components. 34 type containerConfig struct { 35 task *api.Task 36 networksAttachments map[string]*api.NetworkAttachment 37 } 38 39 // newContainerConfig returns a validated container config. No methods should 40 // return an error if this function returns without error. 41 func newContainerConfig(t *api.Task) (*containerConfig, error) { 42 var c containerConfig 43 return &c, c.setTask(t) 44 } 45 46 func (c *containerConfig) setTask(t *api.Task) error { 47 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 48 return exec.ErrRuntimeUnsupported 49 } 50 51 container := t.Spec.GetContainer() 52 if container != nil { 53 if container.Image == "" { 54 return ErrImageRequired 55 } 56 57 if err := validateMounts(container.Mounts); err != nil { 58 return err 59 } 60 } 61 62 // index the networks by name 63 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 64 for _, attachment := range t.Networks { 65 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 66 } 67 68 c.task = t 69 return nil 70 } 71 72 func (c *containerConfig) id() string { 73 attachment := c.task.Spec.GetAttachment() 74 if attachment == nil { 75 return "" 76 } 77 78 return attachment.ContainerID 79 } 80 81 func (c *containerConfig) taskID() string { 82 return c.task.ID 83 } 84 85 func (c *containerConfig) endpoint() *api.Endpoint { 86 return c.task.Endpoint 87 } 88 89 func (c *containerConfig) spec() *api.ContainerSpec { 90 return c.task.Spec.GetContainer() 91 } 92 93 func (c *containerConfig) nameOrID() string { 94 if c.task.Spec.GetContainer() != nil { 95 return c.name() 96 } 97 98 return c.id() 99 } 100 101 func (c *containerConfig) name() string { 102 if c.task.Annotations.Name != "" { 103 // if set, use the container Annotations.Name field, set in the orchestrator. 104 return c.task.Annotations.Name 105 } 106 107 // fallback to service.slot.id. 108 return strings.Join([]string{c.task.ServiceAnnotations.Name, fmt.Sprint(c.task.Slot), c.task.ID}, ".") 109 } 110 111 func (c *containerConfig) image() string { 112 raw := c.spec().Image 113 ref, err := reference.ParseNamed(raw) 114 if err != nil { 115 return raw 116 } 117 return reference.WithDefaultTag(ref).String() 118 } 119 120 func (c *containerConfig) config() *enginecontainer.Config { 121 config := &enginecontainer.Config{ 122 Labels: c.labels(), 123 User: c.spec().User, 124 Env: c.spec().Env, 125 WorkingDir: c.spec().Dir, 126 Image: c.image(), 127 Volumes: c.volumes(), 128 } 129 130 if len(c.spec().Command) > 0 { 131 // If Command is provided, we replace the whole invocation with Command 132 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 133 // case. 134 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 135 config.Cmd = append(config.Cmd, c.spec().Args...) 136 } else if len(c.spec().Args) > 0 { 137 // In this case, we assume the image has an Entrypoint and Args 138 // specifies the arguments for that entrypoint. 139 config.Cmd = c.spec().Args 140 } 141 142 return config 143 } 144 145 func (c *containerConfig) labels() map[string]string { 146 taskName := c.task.Annotations.Name 147 if taskName == "" { 148 if c.task.Slot != 0 { 149 taskName = fmt.Sprintf("%v.%v.%v", c.task.ServiceAnnotations.Name, c.task.Slot, c.task.ID) 150 } else { 151 taskName = fmt.Sprintf("%v.%v.%v", c.task.ServiceAnnotations.Name, c.task.NodeID, c.task.ID) 152 } 153 } 154 var ( 155 system = map[string]string{ 156 "task": "", // mark as cluster task 157 "task.id": c.task.ID, 158 "task.name": taskName, 159 "node.id": c.task.NodeID, 160 "service.id": c.task.ServiceID, 161 "service.name": c.task.ServiceAnnotations.Name, 162 } 163 labels = make(map[string]string) 164 ) 165 166 // base labels are those defined in the spec. 167 for k, v := range c.spec().Labels { 168 labels[k] = v 169 } 170 171 // we then apply the overrides from the task, which may be set via the 172 // orchestrator. 173 for k, v := range c.task.Annotations.Labels { 174 labels[k] = v 175 } 176 177 // finally, we apply the system labels, which override all labels. 178 for k, v := range system { 179 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 180 } 181 182 return labels 183 } 184 185 // volumes gets placed into the Volumes field on the containerConfig. 186 func (c *containerConfig) volumes() map[string]struct{} { 187 r := make(map[string]struct{}) 188 // Volumes *only* creates anonymous volumes. The rest is mixed in with 189 // binds, which aren't actually binds. Basically, any volume that 190 // results in a single component must be added here. 191 // 192 // This is reversed engineered from the behavior of the engine API. 193 for _, mount := range c.spec().Mounts { 194 if mount.Type == api.MountTypeVolume && mount.Source == "" { 195 r[mount.Target] = struct{}{} 196 } 197 } 198 return r 199 } 200 201 func (c *containerConfig) tmpfs() map[string]string { 202 r := make(map[string]string) 203 204 for _, spec := range c.spec().Mounts { 205 if spec.Type != api.MountTypeTmpfs { 206 continue 207 } 208 209 r[spec.Target] = getMountMask(&spec) 210 } 211 212 return r 213 } 214 215 func (c *containerConfig) binds() []string { 216 var r []string 217 for _, mount := range c.spec().Mounts { 218 if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") { 219 spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target) 220 mask := getMountMask(&mount) 221 if mask != "" { 222 spec = fmt.Sprintf("%s:%s", spec, mask) 223 } 224 r = append(r, spec) 225 } 226 } 227 return r 228 } 229 230 func getMountMask(m *api.Mount) string { 231 var maskOpts []string 232 if m.ReadOnly { 233 maskOpts = append(maskOpts, "ro") 234 } 235 236 switch m.Type { 237 case api.MountTypeVolume: 238 if m.VolumeOptions != nil && m.VolumeOptions.NoCopy { 239 maskOpts = append(maskOpts, "nocopy") 240 } 241 case api.MountTypeBind: 242 if m.BindOptions == nil { 243 break 244 } 245 246 switch m.BindOptions.Propagation { 247 case api.MountPropagationPrivate: 248 maskOpts = append(maskOpts, "private") 249 case api.MountPropagationRPrivate: 250 maskOpts = append(maskOpts, "rprivate") 251 case api.MountPropagationShared: 252 maskOpts = append(maskOpts, "shared") 253 case api.MountPropagationRShared: 254 maskOpts = append(maskOpts, "rshared") 255 case api.MountPropagationSlave: 256 maskOpts = append(maskOpts, "slave") 257 case api.MountPropagationRSlave: 258 maskOpts = append(maskOpts, "rslave") 259 } 260 case api.MountTypeTmpfs: 261 if m.TmpfsOptions == nil { 262 break 263 } 264 265 if m.TmpfsOptions.Mode != 0 { 266 maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode)) 267 } 268 269 if m.TmpfsOptions.SizeBytes != 0 { 270 // calculate suffix here, making this linux specific, but that is 271 // okay, since API is that way anyways. 272 273 // we do this by finding the suffix that divides evenly into the 274 // value, returing the value itself, with no suffix, if it fails. 275 // 276 // For the most part, we don't enforce any semantic to this values. 277 // The operating system will usually align this and enforce minimum 278 // and maximums. 279 var ( 280 size = m.TmpfsOptions.SizeBytes 281 suffix string 282 ) 283 for _, r := range []struct { 284 suffix string 285 divisor int64 286 }{ 287 {"g", 1 << 30}, 288 {"m", 1 << 20}, 289 {"k", 1 << 10}, 290 } { 291 if size%r.divisor == 0 { 292 size = size / r.divisor 293 suffix = r.suffix 294 break 295 } 296 } 297 298 maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix)) 299 } 300 } 301 302 return strings.Join(maskOpts, ",") 303 } 304 305 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 306 hc := &enginecontainer.HostConfig{ 307 Resources: c.resources(), 308 Binds: c.binds(), 309 Tmpfs: c.tmpfs(), 310 GroupAdd: c.spec().Groups, 311 } 312 313 if c.task.LogDriver != nil { 314 hc.LogConfig = enginecontainer.LogConfig{ 315 Type: c.task.LogDriver.Name, 316 Config: c.task.LogDriver.Options, 317 } 318 } 319 320 return hc 321 } 322 323 // This handles the case of volumes that are defined inside a service Mount 324 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest { 325 var ( 326 driverName string 327 driverOpts map[string]string 328 labels map[string]string 329 ) 330 331 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 332 driverName = mount.VolumeOptions.DriverConfig.Name 333 driverOpts = mount.VolumeOptions.DriverConfig.Options 334 labels = mount.VolumeOptions.Labels 335 } 336 337 if mount.VolumeOptions != nil { 338 return &types.VolumeCreateRequest{ 339 Name: mount.Source, 340 Driver: driverName, 341 DriverOpts: driverOpts, 342 Labels: labels, 343 } 344 } 345 return nil 346 } 347 348 func (c *containerConfig) resources() enginecontainer.Resources { 349 resources := enginecontainer.Resources{} 350 351 // If no limits are specified let the engine use its defaults. 352 // 353 // TODO(aluzzardi): We might want to set some limits anyway otherwise 354 // "unlimited" tasks will step over the reservation of other tasks. 355 r := c.task.Spec.Resources 356 if r == nil || r.Limits == nil { 357 return resources 358 } 359 360 if r.Limits.MemoryBytes > 0 { 361 resources.Memory = r.Limits.MemoryBytes 362 } 363 364 if r.Limits.NanoCPUs > 0 { 365 // CPU Period must be set in microseconds. 366 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 367 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 368 } 369 370 return resources 371 } 372 373 // Docker daemon supports just 1 network during container create. 374 func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig { 375 var networks []*api.NetworkAttachment 376 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 377 networks = c.task.Networks 378 } 379 380 epConfig := make(map[string]*network.EndpointSettings) 381 if len(networks) > 0 { 382 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0]) 383 } 384 385 return &network.NetworkingConfig{EndpointsConfig: epConfig} 386 } 387 388 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 389 func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig { 390 var networks []*api.NetworkAttachment 391 if c.task.Spec.GetContainer() != nil { 392 networks = c.task.Networks 393 } 394 395 // First network is used during container create. Other networks are used in "docker network connect" 396 if len(networks) < 2 { 397 return nil 398 } 399 400 epConfig := make(map[string]*network.EndpointSettings) 401 for _, na := range networks[1:] { 402 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na) 403 } 404 return &network.NetworkingConfig{EndpointsConfig: epConfig} 405 } 406 407 func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings { 408 var ipv4, ipv6 string 409 for _, addr := range na.Addresses { 410 ip, _, err := net.ParseCIDR(addr) 411 if err != nil { 412 continue 413 } 414 415 if ip.To4() != nil { 416 ipv4 = ip.String() 417 continue 418 } 419 420 if ip.To16() != nil { 421 ipv6 = ip.String() 422 } 423 } 424 425 return &network.EndpointSettings{ 426 NetworkID: na.Network.ID, 427 IPAMConfig: &network.EndpointIPAMConfig{ 428 IPv4Address: ipv4, 429 IPv6Address: ipv6, 430 }, 431 } 432 } 433 434 func (c *containerConfig) virtualIP(networkID string) string { 435 if c.task.Endpoint == nil { 436 return "" 437 } 438 439 for _, eVip := range c.task.Endpoint.VirtualIPs { 440 // We only support IPv4 VIPs for now. 441 if eVip.NetworkID == networkID { 442 vip, _, err := net.ParseCIDR(eVip.Addr) 443 if err != nil { 444 return "" 445 } 446 447 return vip.String() 448 } 449 } 450 451 return "" 452 } 453 454 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 455 if len(c.task.Networks) == 0 { 456 return nil 457 } 458 459 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 460 svcCfg := &clustertypes.ServiceConfig{ 461 Name: c.task.ServiceAnnotations.Name, 462 Aliases: make(map[string][]string), 463 ID: c.task.ServiceID, 464 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 465 } 466 467 for _, na := range c.task.Networks { 468 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 469 // We support only IPv4 virtual IP for now. 470 IPv4: c.virtualIP(na.Network.ID), 471 } 472 if len(na.Aliases) > 0 { 473 svcCfg.Aliases[na.Network.ID] = na.Aliases 474 } 475 } 476 477 if c.task.Endpoint != nil { 478 for _, ePort := range c.task.Endpoint.Ports { 479 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 480 Name: ePort.Name, 481 Protocol: int32(ePort.Protocol), 482 TargetPort: ePort.TargetPort, 483 PublishedPort: ePort.PublishedPort, 484 }) 485 } 486 } 487 488 return svcCfg 489 } 490 491 // networks returns a list of network names attached to the container. The 492 // returned name can be used to lookup the corresponding network create 493 // options. 494 func (c *containerConfig) networks() []string { 495 var networks []string 496 497 for name := range c.networksAttachments { 498 networks = append(networks, name) 499 } 500 501 return networks 502 } 503 504 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 505 na, ok := c.networksAttachments[name] 506 if !ok { 507 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 508 } 509 510 options := types.NetworkCreate{ 511 // ID: na.Network.ID, 512 Driver: na.Network.DriverState.Name, 513 IPAM: &network.IPAM{ 514 Driver: na.Network.IPAM.Driver.Name, 515 }, 516 Options: na.Network.DriverState.Options, 517 Labels: na.Network.Spec.Annotations.Labels, 518 Internal: na.Network.Spec.Internal, 519 EnableIPv6: na.Network.Spec.Ipv6Enabled, 520 CheckDuplicate: true, 521 } 522 523 for _, ic := range na.Network.IPAM.Configs { 524 c := network.IPAMConfig{ 525 Subnet: ic.Subnet, 526 IPRange: ic.Range, 527 Gateway: ic.Gateway, 528 } 529 options.IPAM.Config = append(options.IPAM.Config, c) 530 } 531 532 return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil 533 } 534 535 func (c containerConfig) eventFilter() filters.Args { 536 filter := filters.NewArgs() 537 filter.Add("type", events.ContainerEventType) 538 filter.Add("name", c.name()) 539 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 540 return filter 541 }