github.com/mhy12345/docker@v1.12.3/daemon/cluster/executor/container/container.go (about) 1 package container 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strings" 8 "time" 9 10 "github.com/Sirupsen/logrus" 11 12 clustertypes "github.com/docker/docker/daemon/cluster/provider" 13 "github.com/docker/docker/reference" 14 "github.com/docker/engine-api/types" 15 enginecontainer "github.com/docker/engine-api/types/container" 16 "github.com/docker/engine-api/types/events" 17 "github.com/docker/engine-api/types/filters" 18 "github.com/docker/engine-api/types/network" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 ) 22 23 const ( 24 // Explicitly use the kernel's default setting for CPU quota of 100ms. 25 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 26 cpuQuotaPeriod = 100 * time.Millisecond 27 28 // systemLabelPrefix represents the reserved namespace for system labels. 29 systemLabelPrefix = "com.docker.swarm" 30 ) 31 32 // containerConfig converts task properties into docker container compatible 33 // components. 34 type containerConfig struct { 35 task *api.Task 36 networksAttachments map[string]*api.NetworkAttachment 37 } 38 39 // newContainerConfig returns a validated container config. No methods should 40 // return an error if this function returns without error. 41 func newContainerConfig(t *api.Task) (*containerConfig, error) { 42 var c containerConfig 43 return &c, c.setTask(t) 44 } 45 46 func (c *containerConfig) setTask(t *api.Task) error { 47 container := t.Spec.GetContainer() 48 if container == nil { 49 return exec.ErrRuntimeUnsupported 50 } 51 52 if container.Image == "" { 53 return ErrImageRequired 54 } 55 56 if err := validateMounts(container.Mounts); err != nil { 57 return err 58 } 59 60 // index the networks by name 61 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 62 for _, attachment := range t.Networks { 63 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 64 } 65 66 c.task = t 67 return nil 68 } 69 70 func (c *containerConfig) endpoint() *api.Endpoint { 71 return c.task.Endpoint 72 } 73 74 func (c *containerConfig) spec() *api.ContainerSpec { 75 return c.task.Spec.GetContainer() 76 } 77 78 func (c *containerConfig) name() string { 79 if c.task.Annotations.Name != "" { 80 // if set, use the container Annotations.Name field, set in the orchestrator. 81 return c.task.Annotations.Name 82 } 83 84 // fallback to service.slot.id. 85 return strings.Join([]string{c.task.ServiceAnnotations.Name, fmt.Sprint(c.task.Slot), c.task.ID}, ".") 86 } 87 88 func (c *containerConfig) image() string { 89 raw := c.spec().Image 90 ref, err := reference.ParseNamed(raw) 91 if err != nil { 92 return raw 93 } 94 return reference.WithDefaultTag(ref).String() 95 } 96 97 func (c *containerConfig) config() *enginecontainer.Config { 98 config := &enginecontainer.Config{ 99 Labels: c.labels(), 100 User: c.spec().User, 101 Env: c.spec().Env, 102 WorkingDir: c.spec().Dir, 103 Image: c.image(), 104 Volumes: c.volumes(), 105 } 106 107 if len(c.spec().Command) > 0 { 108 // If Command is provided, we replace the whole invocation with Command 109 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 110 // case. 111 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 112 config.Cmd = append(config.Cmd, c.spec().Args...) 113 } else if len(c.spec().Args) > 0 { 114 // In this case, we assume the image has an Entrypoint and Args 115 // specifies the arguments for that entrypoint. 116 config.Cmd = c.spec().Args 117 } 118 119 return config 120 } 121 122 func (c *containerConfig) labels() map[string]string { 123 var ( 124 system = map[string]string{ 125 "task": "", // mark as cluster task 126 "task.id": c.task.ID, 127 "task.name": fmt.Sprintf("%v.%v", c.task.ServiceAnnotations.Name, c.task.Slot), 128 "node.id": c.task.NodeID, 129 "service.id": c.task.ServiceID, 130 "service.name": c.task.ServiceAnnotations.Name, 131 } 132 labels = make(map[string]string) 133 ) 134 135 // base labels are those defined in the spec. 136 for k, v := range c.spec().Labels { 137 labels[k] = v 138 } 139 140 // we then apply the overrides from the task, which may be set via the 141 // orchestrator. 142 for k, v := range c.task.Annotations.Labels { 143 labels[k] = v 144 } 145 146 // finally, we apply the system labels, which override all labels. 147 for k, v := range system { 148 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 149 } 150 151 return labels 152 } 153 154 // volumes gets placed into the Volumes field on the containerConfig. 155 func (c *containerConfig) volumes() map[string]struct{} { 156 r := make(map[string]struct{}) 157 // Volumes *only* creates anonymous volumes. The rest is mixed in with 158 // binds, which aren't actually binds. Basically, any volume that 159 // results in a single component must be added here. 160 // 161 // This is reversed engineered from the behavior of the engine API. 162 for _, mount := range c.spec().Mounts { 163 if mount.Type == api.MountTypeVolume && mount.Source == "" { 164 r[mount.Target] = struct{}{} 165 } 166 } 167 return r 168 } 169 170 func (c *containerConfig) tmpfs() map[string]string { 171 r := make(map[string]string) 172 173 for _, spec := range c.spec().Mounts { 174 if spec.Type != api.MountTypeTmpfs { 175 continue 176 } 177 178 r[spec.Target] = getMountMask(&spec) 179 } 180 181 return r 182 } 183 184 func (c *containerConfig) binds() []string { 185 var r []string 186 for _, mount := range c.spec().Mounts { 187 if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") { 188 spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target) 189 mask := getMountMask(&mount) 190 if mask != "" { 191 spec = fmt.Sprintf("%s:%s", spec, mask) 192 } 193 r = append(r, spec) 194 } 195 } 196 return r 197 } 198 199 func getMountMask(m *api.Mount) string { 200 var maskOpts []string 201 if m.ReadOnly { 202 maskOpts = append(maskOpts, "ro") 203 } 204 205 switch m.Type { 206 case api.MountTypeVolume: 207 if m.VolumeOptions != nil && m.VolumeOptions.NoCopy { 208 maskOpts = append(maskOpts, "nocopy") 209 } 210 case api.MountTypeBind: 211 if m.BindOptions == nil { 212 break 213 } 214 215 switch m.BindOptions.Propagation { 216 case api.MountPropagationPrivate: 217 maskOpts = append(maskOpts, "private") 218 case api.MountPropagationRPrivate: 219 maskOpts = append(maskOpts, "rprivate") 220 case api.MountPropagationShared: 221 maskOpts = append(maskOpts, "shared") 222 case api.MountPropagationRShared: 223 maskOpts = append(maskOpts, "rshared") 224 case api.MountPropagationSlave: 225 maskOpts = append(maskOpts, "slave") 226 case api.MountPropagationRSlave: 227 maskOpts = append(maskOpts, "rslave") 228 } 229 case api.MountTypeTmpfs: 230 if m.TmpfsOptions == nil { 231 break 232 } 233 234 if m.TmpfsOptions.Mode != 0 { 235 maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode)) 236 } 237 238 if m.TmpfsOptions.SizeBytes != 0 { 239 // calculate suffix here, making this linux specific, but that is 240 // okay, since API is that way anyways. 241 242 // we do this by finding the suffix that divides evenly into the 243 // value, returing the value itself, with no suffix, if it fails. 244 // 245 // For the most part, we don't enforce any semantic to this values. 246 // The operating system will usually align this and enforce minimum 247 // and maximums. 248 var ( 249 size = m.TmpfsOptions.SizeBytes 250 suffix string 251 ) 252 for _, r := range []struct { 253 suffix string 254 divisor int64 255 }{ 256 {"g", 1 << 30}, 257 {"m", 1 << 20}, 258 {"k", 1 << 10}, 259 } { 260 if size%r.divisor == 0 { 261 size = size / r.divisor 262 suffix = r.suffix 263 break 264 } 265 } 266 267 maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix)) 268 } 269 } 270 271 return strings.Join(maskOpts, ",") 272 } 273 274 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 275 hc := &enginecontainer.HostConfig{ 276 Resources: c.resources(), 277 Binds: c.binds(), 278 Tmpfs: c.tmpfs(), 279 } 280 281 if c.task.LogDriver != nil { 282 hc.LogConfig = enginecontainer.LogConfig{ 283 Type: c.task.LogDriver.Name, 284 Config: c.task.LogDriver.Options, 285 } 286 } 287 288 return hc 289 } 290 291 // This handles the case of volumes that are defined inside a service Mount 292 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest { 293 var ( 294 driverName string 295 driverOpts map[string]string 296 labels map[string]string 297 ) 298 299 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 300 driverName = mount.VolumeOptions.DriverConfig.Name 301 driverOpts = mount.VolumeOptions.DriverConfig.Options 302 labels = mount.VolumeOptions.Labels 303 } 304 305 if mount.VolumeOptions != nil { 306 return &types.VolumeCreateRequest{ 307 Name: mount.Source, 308 Driver: driverName, 309 DriverOpts: driverOpts, 310 Labels: labels, 311 } 312 } 313 return nil 314 } 315 316 func (c *containerConfig) resources() enginecontainer.Resources { 317 resources := enginecontainer.Resources{} 318 319 // If no limits are specified let the engine use its defaults. 320 // 321 // TODO(aluzzardi): We might want to set some limits anyway otherwise 322 // "unlimited" tasks will step over the reservation of other tasks. 323 r := c.task.Spec.Resources 324 if r == nil || r.Limits == nil { 325 return resources 326 } 327 328 if r.Limits.MemoryBytes > 0 { 329 resources.Memory = r.Limits.MemoryBytes 330 } 331 332 if r.Limits.NanoCPUs > 0 { 333 // CPU Period must be set in microseconds. 334 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 335 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 336 } 337 338 return resources 339 } 340 341 // Docker daemon supports just 1 network during container create. 342 func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig { 343 var networks []*api.NetworkAttachment 344 if c.task.Spec.GetContainer() != nil { 345 networks = c.task.Networks 346 } 347 348 epConfig := make(map[string]*network.EndpointSettings) 349 if len(networks) > 0 { 350 epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0]) 351 } 352 353 return &network.NetworkingConfig{EndpointsConfig: epConfig} 354 } 355 356 // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create 357 func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig { 358 var networks []*api.NetworkAttachment 359 if c.task.Spec.GetContainer() != nil { 360 networks = c.task.Networks 361 } 362 363 // First network is used during container create. Other networks are used in "docker network connect" 364 if len(networks) < 2 { 365 return nil 366 } 367 368 epConfig := make(map[string]*network.EndpointSettings) 369 for _, na := range networks[1:] { 370 epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na) 371 } 372 return &network.NetworkingConfig{EndpointsConfig: epConfig} 373 } 374 375 func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings { 376 var ipv4, ipv6 string 377 for _, addr := range na.Addresses { 378 ip, _, err := net.ParseCIDR(addr) 379 if err != nil { 380 continue 381 } 382 383 if ip.To4() != nil { 384 ipv4 = ip.String() 385 continue 386 } 387 388 if ip.To16() != nil { 389 ipv6 = ip.String() 390 } 391 } 392 393 return &network.EndpointSettings{ 394 IPAMConfig: &network.EndpointIPAMConfig{ 395 IPv4Address: ipv4, 396 IPv6Address: ipv6, 397 }, 398 } 399 } 400 401 func (c *containerConfig) virtualIP(networkID string) string { 402 if c.task.Endpoint == nil { 403 return "" 404 } 405 406 for _, eVip := range c.task.Endpoint.VirtualIPs { 407 // We only support IPv4 VIPs for now. 408 if eVip.NetworkID == networkID { 409 vip, _, err := net.ParseCIDR(eVip.Addr) 410 if err != nil { 411 return "" 412 } 413 414 return vip.String() 415 } 416 } 417 418 return "" 419 } 420 421 func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig { 422 if len(c.task.Networks) == 0 { 423 return nil 424 } 425 426 logrus.Debugf("Creating service config in agent for t = %+v", c.task) 427 svcCfg := &clustertypes.ServiceConfig{ 428 Name: c.task.ServiceAnnotations.Name, 429 Aliases: make(map[string][]string), 430 ID: c.task.ServiceID, 431 VirtualAddresses: make(map[string]*clustertypes.VirtualAddress), 432 } 433 434 for _, na := range c.task.Networks { 435 svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{ 436 // We support only IPv4 virtual IP for now. 437 IPv4: c.virtualIP(na.Network.ID), 438 } 439 if len(na.Aliases) > 0 { 440 svcCfg.Aliases[na.Network.ID] = na.Aliases 441 } 442 } 443 444 if c.task.Endpoint != nil { 445 for _, ePort := range c.task.Endpoint.Ports { 446 svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{ 447 Name: ePort.Name, 448 Protocol: int32(ePort.Protocol), 449 TargetPort: ePort.TargetPort, 450 PublishedPort: ePort.PublishedPort, 451 }) 452 } 453 } 454 455 return svcCfg 456 } 457 458 // networks returns a list of network names attached to the container. The 459 // returned name can be used to lookup the corresponding network create 460 // options. 461 func (c *containerConfig) networks() []string { 462 var networks []string 463 464 for name := range c.networksAttachments { 465 networks = append(networks, name) 466 } 467 468 return networks 469 } 470 471 func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) { 472 na, ok := c.networksAttachments[name] 473 if !ok { 474 return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced") 475 } 476 477 options := types.NetworkCreate{ 478 // ID: na.Network.ID, 479 Driver: na.Network.DriverState.Name, 480 IPAM: network.IPAM{ 481 Driver: na.Network.IPAM.Driver.Name, 482 }, 483 Options: na.Network.DriverState.Options, 484 Labels: na.Network.Spec.Annotations.Labels, 485 Internal: na.Network.Spec.Internal, 486 EnableIPv6: na.Network.Spec.Ipv6Enabled, 487 CheckDuplicate: true, 488 } 489 490 for _, ic := range na.Network.IPAM.Configs { 491 c := network.IPAMConfig{ 492 Subnet: ic.Subnet, 493 IPRange: ic.Range, 494 Gateway: ic.Gateway, 495 } 496 options.IPAM.Config = append(options.IPAM.Config, c) 497 } 498 499 return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil 500 } 501 502 func (c containerConfig) eventFilter() filters.Args { 503 filter := filters.NewArgs() 504 filter.Add("type", events.ContainerEventType) 505 filter.Add("name", c.name()) 506 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 507 return filter 508 }