github.com/endophage/docker@v1.4.2-0.20161027011718-242853499895/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strings" 8 "time" 9 10 "github.com/Sirupsen/logrus" 11 12 "github.com/docker/docker/api/types" 13 enginecontainer "github.com/docker/docker/api/types/container" 14 "github.com/docker/docker/api/types/events" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/network" 17 clustertypes "github.com/docker/docker/daemon/cluster/provider" 18 "github.com/docker/docker/reference" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 ) 22 23 const ( 24 // Explicitly use the kernel's default setting for CPU quota of 100ms. 25 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 26 cpuQuotaPeriod = 100 * time.Millisecond 27 28 // systemLabelPrefix represents the reserved namespace for system labels. 29 systemLabelPrefix = "com.docker.swarm" 30 ) 31 32 // containerConfig converts task properties into docker container compatible 33 // components. 34 type containerConfig struct { 35 task *api.Task 36 networksAttachments map[string]*api.NetworkAttachment 37 } 38 39 // newContainerConfig returns a validated container config. No methods should 40 // return an error if this function returns without error. 41 func newContainerConfig(t *api.Task) (*containerConfig, error) { 42 var c containerConfig 43 return &c, c.setTask(t) 44 } 45 46 func (c *containerConfig) setTask(t *api.Task) error { 47 if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil { 48 return exec.ErrRuntimeUnsupported 49 } 50 51 container := t.Spec.GetContainer() 52 if container != nil { 53 if container.Image == "" { 54 return ErrImageRequired 55 } 56 57 if err := validateMounts(container.Mounts); err != nil { 58 return err 59 } 60 } 61 62 // index the networks by name 63 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 64 for _, attachment := range t.Networks { 65 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 66 } 67 68 c.task = t 69 return nil 70 } 71 72 func (c *containerConfig) id() string { 73 attachment := c.task.Spec.GetAttachment() 74 if attachment == nil { 75 return "" 76 } 77 78 return attachment.ContainerID 79 } 80 81 func (c *containerConfig) taskID() string { 82 return c.task.ID 83 } 84 85 func (c *containerConfig) endpoint() *api.Endpoint { 86 return c.task.Endpoint 87 } 88 89 func (c *containerConfig) spec() *api.ContainerSpec { 90 return c.task.Spec.GetContainer() 91 } 92 93 func (c *containerConfig) nameOrID() string { 94 if c.task.Spec.GetContainer() != nil { 95 return c.name() 96 } 97 98 return c.id() 99 } 100 101 func (c *containerConfig) name() string { 102 if c.task.Annotations.Name != "" { 103 // if set, use the container Annotations.Name field, set in the orchestrator. 104 return c.task.Annotations.Name 105 } 106 107 slot := fmt.Sprint(c.task.Slot) 108 if slot == "" || c.task.Slot == 0 { 109 slot = c.task.NodeID 110 } 111 112 // fallback to service.slot.id. 113 return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID) 114 } 115 116 func (c *containerConfig) image() string { 117 raw := c.spec().Image 118 ref, err := reference.ParseNamed(raw) 119 if err != nil { 120 return raw 121 } 122 return reference.WithDefaultTag(ref).String() 123 } 124 125 func (c *containerConfig) config() *enginecontainer.Config { 126 config := &enginecontainer.Config{ 127 Labels: c.labels(), 128 User: c.spec().User, 129 Env: c.spec().Env, 130 WorkingDir: c.spec().Dir, 131 Image: c.image(), 132 Volumes: c.volumes(), 133 } 134 135 if len(c.spec().Command) > 0 { 136 // If Command is provided, we replace the whole invocation with Command 137 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 138 // case. 139 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 140 config.Cmd = append(config.Cmd, c.spec().Args...) 141 } else if len(c.spec().Args) > 0 { 142 // In this case, we assume the image has an Entrypoint and Args 143 // specifies the arguments for that entrypoint. 144 config.Cmd = c.spec().Args 145 } 146 147 return config 148 } 149 150 func (c *containerConfig) labels() map[string]string { 151 var ( 152 system = map[string]string{ 153 "task": "", // mark as cluster task 154 "task.id": c.task.ID, 155 "task.name": c.name(), 156 "node.id": c.task.NodeID, 157 "service.id": c.task.ServiceID, 158 "service.name": c.task.ServiceAnnotations.Name, 159 } 160 labels = make(map[string]string) 161 ) 162 163 // base labels are those defined in the spec. 164 for k, v := range c.spec().Labels { 165 labels[k] = v 166 } 167 168 // we then apply the overrides from the task, which may be set via the 169 // orchestrator. 170 for k, v := range c.task.Annotations.Labels { 171 labels[k] = v 172 } 173 174 // finally, we apply the system labels, which override all labels. 175 for k, v := range system { 176 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 177 } 178 179 return labels 180 } 181 182 // volumes gets placed into the Volumes field on the containerConfig. 183 func (c *containerConfig) volumes() map[string]struct{} { 184 r := make(map[string]struct{}) 185 // Volumes *only* creates anonymous volumes. The rest is mixed in with 186 // binds, which aren't actually binds. Basically, any volume that 187 // results in a single component must be added here. 188 // 189 // This is reversed engineered from the behavior of the engine API. 190 for _, mount := range c.spec().Mounts { 191 if mount.Type == api.MountTypeVolume && mount.Source == "" { 192 r[mount.Target] = struct{}{} 193 } 194 } 195 return r 196 } 197 198 func (c *containerConfig) tmpfs() map[string]string { 199 r := make(map[string]string) 200 201 for _, spec := range c.spec().Mounts { 202 if spec.Type != api.MountTypeTmpfs { 203 continue 204 } 205 206 r[spec.Target] = getMountMask(&spec) 207 } 208 209 return r 210 } 211 212 func (c *containerConfig) binds() []string { 213 var r []string 214 for _, mount := range c.spec().Mounts { 215 if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") { 216 spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target) 217 mask := getMountMask(&mount) 218 if mask != "" { 219 spec = fmt.Sprintf("%s:%s", spec, mask) 220 } 221 r = append(r, spec) 222 } 223 } 224 return r 225 } 226 227 func getMountMask(m *api.Mount) string { 228 var maskOpts []string 229 if m.ReadOnly { 230 maskOpts = append(maskOpts, "ro") 231 } 232 233 switch m.Type { 234 case api.MountTypeVolume: 235 if m.VolumeOptions != nil && m.VolumeOptions.NoCopy { 236 maskOpts = append(maskOpts, "nocopy") 237 } 238 case api.MountTypeBind: 239 if m.BindOptions == nil { 240 break 241 } 242 243 switch m.BindOptions.Propagation { 244 case api.MountPropagationPrivate: 245 maskOpts = append(maskOpts, "private") 246 case api.MountPropagationRPrivate: 247 maskOpts = append(maskOpts, "rprivate") 248 case api.MountPropagationShared: 249 maskOpts = append(maskOpts, "shared") 250 case api.MountPropagationRShared: 251 maskOpts = append(maskOpts, "rshared") 252 case api.MountPropagationSlave: 253 maskOpts = append(maskOpts, "slave") 254 case api.MountPropagationRSlave: 255 maskOpts = append(maskOpts, "rslave") 256 } 257 case api.MountTypeTmpfs: 258 if m.TmpfsOptions == nil { 259 break 260 } 261 262 if m.TmpfsOptions.Mode != 0 { 263 maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode)) 264 } 265 266 if m.TmpfsOptions.SizeBytes != 0 { 267 // calculate suffix here, making this linux specific, but that is 268 // okay, since API is that way anyways. 269 270 // we do this by finding the suffix that divides evenly into the 271 // value, returing the value itself, with no suffix, if it fails. 272 // 273 // For the most part, we don't enforce any semantic to this values. 274 // The operating system will usually align this and enforce minimum 275 // and maximums. 276 var ( 277 size = m.TmpfsOptions.SizeBytes 278 suffix string 279 ) 280 for _, r := range []struct { 281 suffix string 282 divisor int64 283 }{ 284 {"g", 1 << 30}, 285 {"m", 1 << 20}, 286 {"k", 1 << 10}, 287 } { 288 if size%r.divisor == 0 { 289 size = size / r.divisor 290 suffix = r.suffix 291 break 292 } 293 } 294 295 maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix)) 296 } 297 } 298 299 return strings.Join(maskOpts, ",") 300 } 301 302 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 303 hc := &enginecontainer.HostConfig{ 304 Resources: c.resources(), 305 Binds: c.binds(), 306 Tmpfs: c.tmpfs(), 307 GroupAdd: c.spec().Groups, 308 } 309 310 if c.task.LogDriver != nil { 311 hc.LogConfig = enginecontainer.LogConfig{ 312 Type: c.task.LogDriver.Name, 313 Config: c.task.LogDriver.Options, 314 } 315 } 316 317 return hc 318 } 319 320 // This handles the case of volumes that are defined inside a service Mount 321 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest { 322 var ( 323 driverName string 324 driverOpts map[string]string 325 labels map[string]string 326 ) 327 328 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 329 driverName = mount.VolumeOptions.DriverConfig.Name 330 driverOpts = mount.VolumeOptions.DriverConfig.Options 331 labels = mount.VolumeOptions.Labels 332 } 333 334 if mount.VolumeOptions != nil { 335 return &types.VolumeCreateRequest{ 336 Name: mount.Source, 337 Driver: driverName, 338 DriverOpts: driverOpts, 339 Labels: labels, 340 } 341 } 342 return nil 343 } 344 345 func (c *containerConfig) resources() enginecontainer.Resources { 346 resources := enginecontainer.Resources{} 347 348 // If no limits are specified let the engine use its defaults. 349 // 350 // TODO(aluzzardi): We might want to set some limits anyway otherwise 351 // "unlimited" tasks will step over the reservation of other tasks. 352 r := c.task.Spec.Resources 353 if r == nil || r.Limits == nil { 354 return resources 355 } 356 357 if r.Limits.MemoryBytes > 0 { 358 resources.Memory = r.Limits.MemoryBytes 359 } 360 361 if r.Limits.NanoCPUs > 0 { 362 // CPU Period must be set in microseconds. 363 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 364 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 365 } 366 367 return resources 368 } 369 370 // Docker daemon supports just 1 network during container create. 371 func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig { 372 var networks []*api.NetworkAttachment 373 if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil { 374 networks = c.task.Networks 375 } 376 377 epConfig := make(map[string]*network.EndpointSettings) 378 if len(networks) > 0 { 379 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0]) 380 } 381 382 return &network.NetworkingConfig{EndpointsConfig: epConfig} 383 } 384 385 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 386 func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig { 387 var networks []*api.NetworkAttachment 388 if c.task.Spec.GetContainer() != nil { 389 networks = c.task.Networks 390 } 391 392 // First network is used during container create. Other networks are used in "docker network connect" 393 if len(networks) < 2 { 394 return nil 395 } 396 397 epConfig := make(map[string]*network.EndpointSettings) 398 for _, na := range networks[1:] { 399 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na) 400 } 401 return &network.NetworkingConfig{EndpointsConfig: epConfig} 402 } 403 404 func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings { 405 var ipv4, ipv6 string 406 for _, addr := range na.Addresses { 407 ip, _, err := net.ParseCIDR(addr) 408 if err != nil { 409 continue 410 } 411 412 if ip.To4() != nil { 413 ipv4 = ip.String() 414 continue 415 } 416 417 if ip.To16() != nil { 418 ipv6 = ip.String() 419 } 420 } 421 422 return &network.EndpointSettings{ 423 NetworkID: na.Network.ID, 424 IPAMConfig: &network.EndpointIPAMConfig{ 425 IPv4Address: ipv4, 426 IPv6Address: ipv6, 427 }, 428 } 429 } 430 431 func (c *containerConfig) virtualIP(networkID string) string { 432 if c.task.Endpoint == nil { 433 return "" 434 } 435 436 for _, eVip := range c.task.Endpoint.VirtualIPs { 437 // We only support IPv4 VIPs for now. 438 if eVip.NetworkID == networkID { 439 vip, _, err := net.ParseCIDR(eVip.Addr) 440 if err != nil { 441 return "" 442 } 443 444 return vip.String() 445 } 446 } 447 448 return "" 449 } 450 451 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 452 if len(c.task.Networks) == 0 { 453 return nil 454 } 455 456 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 457 svcCfg := &clustertypes.ServiceConfig{ 458 Name: c.task.ServiceAnnotations.Name, 459 Aliases: make(map[string][]string), 460 ID: c.task.ServiceID, 461 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 462 } 463 464 for _, na := range c.task.Networks { 465 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 466 // We support only IPv4 virtual IP for now. 467 IPv4: c.virtualIP(na.Network.ID), 468 } 469 if len(na.Aliases) > 0 { 470 svcCfg.Aliases[na.Network.ID] = na.Aliases 471 } 472 } 473 474 if c.task.Endpoint != nil { 475 for _, ePort := range c.task.Endpoint.Ports { 476 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 477 Name: ePort.Name, 478 Protocol: int32(ePort.Protocol), 479 TargetPort: ePort.TargetPort, 480 PublishedPort: ePort.PublishedPort, 481 }) 482 } 483 } 484 485 return svcCfg 486 } 487 488 // networks returns a list of network names attached to the container. The 489 // returned name can be used to lookup the corresponding network create 490 // options. 491 func (c *containerConfig) networks() []string { 492 var networks []string 493 494 for name := range c.networksAttachments { 495 networks = append(networks, name) 496 } 497 498 return networks 499 } 500 501 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 502 na, ok := c.networksAttachments[name] 503 if !ok { 504 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 505 } 506 507 options := types.NetworkCreate{ 508 // ID: na.Network.ID, 509 Driver: na.Network.DriverState.Name, 510 IPAM: &network.IPAM{ 511 Driver: na.Network.IPAM.Driver.Name, 512 }, 513 Options: na.Network.DriverState.Options, 514 Labels: na.Network.Spec.Annotations.Labels, 515 Internal: na.Network.Spec.Internal, 516 EnableIPv6: na.Network.Spec.Ipv6Enabled, 517 CheckDuplicate: true, 518 } 519 520 for _, ic := range na.Network.IPAM.Configs { 521 c := network.IPAMConfig{ 522 Subnet: ic.Subnet, 523 IPRange: ic.Range, 524 Gateway: ic.Gateway, 525 } 526 options.IPAM.Config = append(options.IPAM.Config, c) 527 } 528 529 return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil 530 } 531 532 func (c containerConfig) eventFilter() filters.Args { 533 filter := filters.NewArgs() 534 filter.Add("type", events.ContainerEventType) 535 filter.Add("name", c.name()) 536 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 537 return filter 538 }