github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/dockerapi/container.go (about) 1 package dockerapi 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/docker/docker/api/types" 12 enginecontainer "github.com/docker/docker/api/types/container" 13 "github.com/docker/docker/api/types/events" 14 "github.com/docker/docker/api/types/filters" 15 enginemount "github.com/docker/docker/api/types/mount" 16 "github.com/docker/docker/api/types/network" 17 "github.com/docker/docker/api/types/volume" 18 "github.com/docker/go-connections/nat" 19 "github.com/docker/go-units" 20 "github.com/docker/swarmkit/agent/exec" 21 "github.com/docker/swarmkit/api" 22 "github.com/docker/swarmkit/api/genericresource" 23 "github.com/docker/swarmkit/api/naming" 24 "github.com/docker/swarmkit/template" 25 gogotypes "github.com/gogo/protobuf/types" 26 ) 27 28 const ( 29 // Explicitly use the kernel's default setting for CPU quota of 100ms. 30 // https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 31 cpuQuotaPeriod = 100 * time.Millisecond 32 33 // systemLabelPrefix represents the reserved namespace for system labels. 34 systemLabelPrefix = "com.docker.swarm" 35 ) 36 37 // containerConfig converts task properties into docker container compatible 38 // components. 39 type containerConfig struct { 40 task *api.Task 41 networksAttachments map[string]*api.NetworkAttachment 42 } 43 44 // newContainerConfig returns a validated container config. No methods should 45 // return an error if this function returns without error. 46 func newContainerConfig(n *api.NodeDescription, t *api.Task) (*containerConfig, error) { 47 var c containerConfig 48 return &c, c.setTask(n, t) 49 } 50 51 func (c *containerConfig) setTask(n *api.NodeDescription, t *api.Task) error { 52 container := t.Spec.GetContainer() 53 if container == nil { 54 return exec.ErrRuntimeUnsupported 55 } 56 57 if container.Image == "" { 58 return ErrImageRequired 59 } 60 61 // index the networks by name 62 c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks)) 63 for _, attachment := range t.Networks { 64 c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment 65 } 66 67 c.task = t 68 preparedSpec, err := template.ExpandContainerSpec(n, t) 69 if err != nil { 70 return err 71 } 72 c.task.Spec.Runtime = &api.TaskSpec_Container{ 73 Container: preparedSpec, 74 } 75 76 return nil 77 } 78 79 func (c *containerConfig) endpoint() *api.Endpoint { 80 return c.task.Endpoint 81 } 82 83 func (c *containerConfig) spec() *api.ContainerSpec { 84 return c.task.Spec.GetContainer() 85 } 86 87 func (c *containerConfig) name() string { 88 return naming.Task(c.task) 89 } 90 91 func (c *containerConfig) image() string { 92 return c.spec().Image 93 } 94 95 func portSpec(port uint32, protocol api.PortConfig_Protocol) nat.Port { 96 return nat.Port(fmt.Sprintf("%d/%s", port, strings.ToLower(protocol.String()))) 97 } 98 99 func (c *containerConfig) portBindings() nat.PortMap { 100 portBindings := nat.PortMap{} 101 if c.task.Endpoint == nil { 102 return portBindings 103 } 104 105 for _, portConfig := range c.task.Endpoint.Ports { 106 if portConfig.PublishMode != api.PublishModeHost { 107 continue 108 } 109 110 port := portSpec(portConfig.TargetPort, portConfig.Protocol) 111 binding := []nat.PortBinding{ 112 {}, 113 } 114 115 if portConfig.PublishedPort != 0 { 116 binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort)) 117 } 118 portBindings[port] = binding 119 } 120 121 return portBindings 122 } 123 124 func (c *containerConfig) isolation() enginecontainer.Isolation { 125 switch c.spec().Isolation { 126 case api.ContainerIsolationDefault: 127 return enginecontainer.Isolation("default") 128 case api.ContainerIsolationHyperV: 129 return enginecontainer.Isolation("hyperv") 130 case api.ContainerIsolationProcess: 131 return enginecontainer.Isolation("process") 132 } 133 return enginecontainer.Isolation("") 134 } 135 136 func (c *containerConfig) exposedPorts() map[nat.Port]struct{} { 137 exposedPorts := make(map[nat.Port]struct{}) 138 if c.task.Endpoint == nil { 139 return exposedPorts 140 } 141 142 for _, portConfig := range c.task.Endpoint.Ports { 143 if portConfig.PublishMode != api.PublishModeHost { 144 continue 145 } 146 147 port := portSpec(portConfig.TargetPort, portConfig.Protocol) 148 exposedPorts[port] = struct{}{} 149 } 150 151 return exposedPorts 152 } 153 154 func (c *containerConfig) config() *enginecontainer.Config { 155 genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE") 156 env := append(c.spec().Env, genericEnvs...) 157 158 config := &enginecontainer.Config{ 159 Labels: c.labels(), 160 StopSignal: c.spec().StopSignal, 161 User: c.spec().User, 162 Hostname: c.spec().Hostname, 163 Env: env, 164 WorkingDir: c.spec().Dir, 165 Tty: c.spec().TTY, 166 OpenStdin: c.spec().OpenStdin, 167 Image: c.image(), 168 ExposedPorts: c.exposedPorts(), 169 Healthcheck: c.healthcheck(), 170 } 171 172 if len(c.spec().Command) > 0 { 173 // If Command is provided, we replace the whole invocation with Command 174 // by replacing Entrypoint and specifying Cmd. Args is ignored in this 175 // case. 176 config.Entrypoint = append(config.Entrypoint, c.spec().Command...) 177 config.Cmd = append(config.Cmd, c.spec().Args...) 178 } else if len(c.spec().Args) > 0 { 179 // In this case, we assume the image has an Entrypoint and Args 180 // specifies the arguments for that entrypoint. 181 config.Cmd = c.spec().Args 182 } 183 184 return config 185 } 186 187 func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig { 188 hcSpec := c.spec().Healthcheck 189 if hcSpec == nil { 190 return nil 191 } 192 interval, _ := gogotypes.DurationFromProto(hcSpec.Interval) 193 timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout) 194 startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod) 195 return &enginecontainer.HealthConfig{ 196 Test: hcSpec.Test, 197 Interval: interval, 198 Timeout: timeout, 199 Retries: int(hcSpec.Retries), 200 StartPeriod: startPeriod, 201 } 202 } 203 204 func (c *containerConfig) hostConfig() *enginecontainer.HostConfig { 205 hc := &enginecontainer.HostConfig{ 206 Resources: c.resources(), 207 Mounts: c.mounts(), 208 Tmpfs: c.tmpfs(), 209 GroupAdd: c.spec().Groups, 210 PortBindings: c.portBindings(), 211 Init: c.init(), 212 Isolation: c.isolation(), 213 CapAdd: c.spec().CapabilityAdd, 214 CapDrop: c.spec().CapabilityDrop, 215 } 216 217 // The format of extra hosts on swarmkit is specified in: 218 // http://man7.org/linux/man-pages/man5/hosts.5.html 219 // IP_address canonical_hostname [aliases...] 220 // However, the format of ExtraHosts in HostConfig is 221 // <host>:<ip> 222 // We need to do the conversion here 223 // (Alias is ignored for now) 224 for _, entry := range c.spec().Hosts { 225 parts := strings.Fields(entry) 226 if len(parts) > 1 { 227 hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0])) 228 } 229 } 230 231 if c.task.LogDriver != nil { 232 hc.LogConfig = enginecontainer.LogConfig{ 233 Type: c.task.LogDriver.Name, 234 Config: c.task.LogDriver.Options, 235 } 236 } 237 238 return hc 239 } 240 241 func (c *containerConfig) labels() map[string]string { 242 var ( 243 system = map[string]string{ 244 "task": "", // mark as cluster task 245 "task.id": c.task.ID, 246 "task.name": naming.Task(c.task), 247 "node.id": c.task.NodeID, 248 "service.id": c.task.ServiceID, 249 "service.name": c.task.ServiceAnnotations.Name, 250 } 251 labels = make(map[string]string) 252 ) 253 254 // base labels are those defined in the spec. 255 for k, v := range c.spec().Labels { 256 labels[k] = v 257 } 258 259 // we then apply the overrides from the task, which may be set via the 260 // orchestrator. 261 for k, v := range c.task.Annotations.Labels { 262 labels[k] = v 263 } 264 265 // finally, we apply the system labels, which override all labels. 266 for k, v := range system { 267 labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v 268 } 269 270 return labels 271 } 272 273 func (c *containerConfig) tmpfs() map[string]string { 274 r := make(map[string]string) 275 276 for _, spec := range c.spec().Mounts { 277 if spec.Type != api.MountTypeTmpfs { 278 continue 279 } 280 281 r[spec.Target] = getMountMask(&spec) 282 } 283 284 return r 285 } 286 287 func (c *containerConfig) mounts() []enginemount.Mount { 288 var r []enginemount.Mount 289 for _, mount := range c.spec().Mounts { 290 r = append(r, convertMount(mount)) 291 } 292 return r 293 } 294 295 func convertMount(m api.Mount) enginemount.Mount { 296 mount := enginemount.Mount{ 297 Source: m.Source, 298 Target: m.Target, 299 ReadOnly: m.ReadOnly, 300 } 301 302 switch m.Type { 303 case api.MountTypeBind: 304 mount.Type = enginemount.TypeBind 305 case api.MountTypeVolume: 306 mount.Type = enginemount.TypeVolume 307 case api.MountTypeNamedPipe: 308 mount.Type = enginemount.TypeNamedPipe 309 } 310 311 if m.BindOptions != nil { 312 mount.BindOptions = &enginemount.BindOptions{ 313 NonRecursive: m.BindOptions.NonRecursive, 314 } 315 switch m.BindOptions.Propagation { 316 case api.MountPropagationRPrivate: 317 mount.BindOptions.Propagation = enginemount.PropagationRPrivate 318 case api.MountPropagationPrivate: 319 mount.BindOptions.Propagation = enginemount.PropagationPrivate 320 case api.MountPropagationRSlave: 321 mount.BindOptions.Propagation = enginemount.PropagationRSlave 322 case api.MountPropagationSlave: 323 mount.BindOptions.Propagation = enginemount.PropagationSlave 324 case api.MountPropagationRShared: 325 mount.BindOptions.Propagation = enginemount.PropagationRShared 326 case api.MountPropagationShared: 327 mount.BindOptions.Propagation = enginemount.PropagationShared 328 } 329 } 330 331 if m.VolumeOptions != nil { 332 mount.VolumeOptions = &enginemount.VolumeOptions{ 333 NoCopy: m.VolumeOptions.NoCopy, 334 } 335 if m.VolumeOptions.Labels != nil { 336 mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels)) 337 for k, v := range m.VolumeOptions.Labels { 338 mount.VolumeOptions.Labels[k] = v 339 } 340 } 341 if m.VolumeOptions.DriverConfig != nil { 342 mount.VolumeOptions.DriverConfig = &enginemount.Driver{ 343 Name: m.VolumeOptions.DriverConfig.Name, 344 } 345 if m.VolumeOptions.DriverConfig.Options != nil { 346 mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options)) 347 for k, v := range m.VolumeOptions.DriverConfig.Options { 348 mount.VolumeOptions.DriverConfig.Options[k] = v 349 } 350 } 351 } 352 } 353 return mount 354 } 355 356 func getMountMask(m *api.Mount) string { 357 var maskOpts []string 358 if m.ReadOnly { 359 maskOpts = append(maskOpts, "ro") 360 } 361 362 switch m.Type { 363 case api.MountTypeTmpfs: 364 if m.TmpfsOptions == nil { 365 break 366 } 367 368 if m.TmpfsOptions.Mode != 0 { 369 maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode)) 370 } 371 372 if m.TmpfsOptions.SizeBytes != 0 { 373 // calculate suffix here, making this linux specific, but that is 374 // okay, since API is that way anyways. 375 376 // we do this by finding the suffix that divides evenly into the 377 // value, returning the value itself, with no suffix, if it fails. 378 // 379 // For the most part, we don't enforce any semantic to this values. 380 // The operating system will usually align this and enforce minimum 381 // and maximums. 382 var ( 383 size = m.TmpfsOptions.SizeBytes 384 suffix string 385 ) 386 for _, r := range []struct { 387 suffix string 388 divisor int64 389 }{ 390 {"g", 1 << 30}, 391 {"m", 1 << 20}, 392 {"k", 1 << 10}, 393 } { 394 if size%r.divisor == 0 { 395 size = size / r.divisor 396 suffix = r.suffix 397 break 398 } 399 } 400 401 maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix)) 402 } 403 404 if opts := m.TmpfsOptions.Options; opts != "" { 405 validOpts := map[string]bool{ 406 "exec": true, 407 "noexec": true, 408 } 409 for _, opt := range strings.Split(strings.ToLower(opts), ",") { 410 if _, ok := validOpts[opt]; ok { 411 maskOpts = append(maskOpts, opt) 412 } 413 } 414 } 415 } 416 417 return strings.Join(maskOpts, ",") 418 } 419 420 // This handles the case of volumes that are defined inside a service Mount 421 func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volume.VolumeCreateBody { 422 var ( 423 driverName string 424 driverOpts map[string]string 425 labels map[string]string 426 ) 427 428 if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil { 429 driverName = mount.VolumeOptions.DriverConfig.Name 430 driverOpts = mount.VolumeOptions.DriverConfig.Options 431 labels = mount.VolumeOptions.Labels 432 } 433 434 return &volume.VolumeCreateBody{ 435 Name: mount.Source, 436 Driver: driverName, 437 DriverOpts: driverOpts, 438 Labels: labels, 439 } 440 } 441 442 func (c *containerConfig) resources() enginecontainer.Resources { 443 resources := enginecontainer.Resources{} 444 445 // set pids limit 446 pidsLimit := c.spec().PidsLimit 447 if pidsLimit > 0 { 448 resources.PidsLimit = &pidsLimit 449 } 450 451 resources.Ulimits = make([]*units.Ulimit, len(c.spec().Ulimits)) 452 for i, ulimit := range c.spec().Ulimits { 453 resources.Ulimits[i] = &units.Ulimit{ 454 Name: ulimit.Name, 455 Soft: ulimit.Soft, 456 Hard: ulimit.Hard, 457 } 458 } 459 460 // If no limits are specified let the engine use its defaults. 461 // 462 // TODO(aluzzardi): We might want to set some limits anyway otherwise 463 // "unlimited" tasks will step over the reservation of other tasks. 464 r := c.task.Spec.Resources 465 if r == nil || r.Limits == nil { 466 return resources 467 } 468 469 if r.Limits.MemoryBytes > 0 { 470 resources.Memory = r.Limits.MemoryBytes 471 } 472 473 if r.Limits.NanoCPUs > 0 { 474 // CPU Period must be set in microseconds. 475 resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond) 476 resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9 477 } 478 479 return resources 480 } 481 482 func (c *containerConfig) virtualIP(networkID string) string { 483 if c.task.Endpoint == nil { 484 return "" 485 } 486 487 for _, vip := range c.task.Endpoint.VirtualIPs { 488 // We only support IPv4 VIPs for now. 489 if vip.NetworkID == networkID { 490 vip, _, err := net.ParseCIDR(vip.Addr) 491 if err != nil { 492 return "" 493 } 494 495 return vip.String() 496 } 497 } 498 499 return "" 500 } 501 502 func (c *containerConfig) networkingConfig() *network.NetworkingConfig { 503 epConfig := make(map[string]*network.EndpointSettings) 504 for _, na := range c.task.Networks { 505 var ipv4, ipv6 string 506 for _, addr := range na.Addresses { 507 ip, _, err := net.ParseCIDR(addr) 508 if err != nil { 509 continue 510 } 511 512 if ip.To4() != nil { 513 ipv4 = ip.String() 514 continue 515 } 516 517 if ip.To16() != nil { 518 ipv6 = ip.String() 519 } 520 } 521 522 epSettings := &network.EndpointSettings{ 523 IPAMConfig: &network.EndpointIPAMConfig{ 524 IPv4Address: ipv4, 525 IPv6Address: ipv6, 526 }, 527 } 528 529 epConfig[na.Network.Spec.Annotations.Name] = epSettings 530 } 531 532 return &network.NetworkingConfig{EndpointsConfig: epConfig} 533 } 534 535 // networks returns a list of network names attached to the container. The 536 // returned name can be used to lookup the corresponding network create 537 // options. 538 func (c *containerConfig) networks() []string { 539 var networks []string 540 541 for name := range c.networksAttachments { 542 networks = append(networks, name) 543 } 544 545 return networks 546 } 547 548 func (c *containerConfig) networkCreateOptions(name string) (types.NetworkCreate, error) { 549 na, ok := c.networksAttachments[name] 550 if !ok { 551 return types.NetworkCreate{}, errors.New("container: unknown network referenced") 552 } 553 554 options := types.NetworkCreate{ 555 Driver: na.Network.DriverState.Name, 556 IPAM: &network.IPAM{ 557 Driver: na.Network.IPAM.Driver.Name, 558 }, 559 Options: na.Network.DriverState.Options, 560 CheckDuplicate: true, 561 } 562 563 for _, ic := range na.Network.IPAM.Configs { 564 c := network.IPAMConfig{ 565 Subnet: ic.Subnet, 566 IPRange: ic.Range, 567 Gateway: ic.Gateway, 568 } 569 options.IPAM.Config = append(options.IPAM.Config, c) 570 } 571 572 return options, nil 573 } 574 575 func (c containerConfig) eventFilter() filters.Args { 576 filter := filters.NewArgs() 577 filter.Add("type", events.ContainerEventType) 578 filter.Add("name", c.name()) 579 filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID)) 580 return filter 581 } 582 583 func (c *containerConfig) init() *bool { 584 if c.spec().Init != nil { 585 return &c.spec().Init.Value 586 } 587 return nil 588 }