github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/dockerapi/container.go (about)

     1  package dockerapi
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"strconv"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/docker/docker/api/types"
    12  	enginecontainer "github.com/docker/docker/api/types/container"
    13  	"github.com/docker/docker/api/types/events"
    14  	"github.com/docker/docker/api/types/filters"
    15  	enginemount "github.com/docker/docker/api/types/mount"
    16  	"github.com/docker/docker/api/types/network"
    17  	"github.com/docker/docker/api/types/volume"
    18  	"github.com/docker/go-connections/nat"
    19  	"github.com/docker/go-units"
    20  	"github.com/docker/swarmkit/agent/exec"
    21  	"github.com/docker/swarmkit/api"
    22  	"github.com/docker/swarmkit/api/genericresource"
    23  	"github.com/docker/swarmkit/api/naming"
    24  	"github.com/docker/swarmkit/template"
    25  	gogotypes "github.com/gogo/protobuf/types"
    26  )
    27  
    28  const (
    29  	// Explicitly use the kernel's default setting for CPU quota of 100ms.
    30  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
    31  	cpuQuotaPeriod = 100 * time.Millisecond
    32  
    33  	// systemLabelPrefix represents the reserved namespace for system labels.
    34  	systemLabelPrefix = "com.docker.swarm"
    35  )
    36  
    37  // containerConfig converts task properties into docker container compatible
    38  // components.
    39  type containerConfig struct {
    40  	task                *api.Task
    41  	networksAttachments map[string]*api.NetworkAttachment
    42  }
    43  
    44  // newContainerConfig returns a validated container config. No methods should
    45  // return an error if this function returns without error.
    46  func newContainerConfig(n *api.NodeDescription, t *api.Task) (*containerConfig, error) {
    47  	var c containerConfig
    48  	return &c, c.setTask(n, t)
    49  }
    50  
    51  func (c *containerConfig) setTask(n *api.NodeDescription, t *api.Task) error {
    52  	container := t.Spec.GetContainer()
    53  	if container == nil {
    54  		return exec.ErrRuntimeUnsupported
    55  	}
    56  
    57  	if container.Image == "" {
    58  		return ErrImageRequired
    59  	}
    60  
    61  	// index the networks by name
    62  	c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
    63  	for _, attachment := range t.Networks {
    64  		c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
    65  	}
    66  
    67  	c.task = t
    68  	preparedSpec, err := template.ExpandContainerSpec(n, t)
    69  	if err != nil {
    70  		return err
    71  	}
    72  	c.task.Spec.Runtime = &api.TaskSpec_Container{
    73  		Container: preparedSpec,
    74  	}
    75  
    76  	return nil
    77  }
    78  
    79  func (c *containerConfig) endpoint() *api.Endpoint {
    80  	return c.task.Endpoint
    81  }
    82  
    83  func (c *containerConfig) spec() *api.ContainerSpec {
    84  	return c.task.Spec.GetContainer()
    85  }
    86  
    87  func (c *containerConfig) name() string {
    88  	return naming.Task(c.task)
    89  }
    90  
    91  func (c *containerConfig) image() string {
    92  	return c.spec().Image
    93  }
    94  
    95  func portSpec(port uint32, protocol api.PortConfig_Protocol) nat.Port {
    96  	return nat.Port(fmt.Sprintf("%d/%s", port, strings.ToLower(protocol.String())))
    97  }
    98  
    99  func (c *containerConfig) portBindings() nat.PortMap {
   100  	portBindings := nat.PortMap{}
   101  	if c.task.Endpoint == nil {
   102  		return portBindings
   103  	}
   104  
   105  	for _, portConfig := range c.task.Endpoint.Ports {
   106  		if portConfig.PublishMode != api.PublishModeHost {
   107  			continue
   108  		}
   109  
   110  		port := portSpec(portConfig.TargetPort, portConfig.Protocol)
   111  		binding := []nat.PortBinding{
   112  			{},
   113  		}
   114  
   115  		if portConfig.PublishedPort != 0 {
   116  			binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort))
   117  		}
   118  		portBindings[port] = binding
   119  	}
   120  
   121  	return portBindings
   122  }
   123  
   124  func (c *containerConfig) isolation() enginecontainer.Isolation {
   125  	switch c.spec().Isolation {
   126  	case api.ContainerIsolationDefault:
   127  		return enginecontainer.Isolation("default")
   128  	case api.ContainerIsolationHyperV:
   129  		return enginecontainer.Isolation("hyperv")
   130  	case api.ContainerIsolationProcess:
   131  		return enginecontainer.Isolation("process")
   132  	}
   133  	return enginecontainer.Isolation("")
   134  }
   135  
   136  func (c *containerConfig) exposedPorts() map[nat.Port]struct{} {
   137  	exposedPorts := make(map[nat.Port]struct{})
   138  	if c.task.Endpoint == nil {
   139  		return exposedPorts
   140  	}
   141  
   142  	for _, portConfig := range c.task.Endpoint.Ports {
   143  		if portConfig.PublishMode != api.PublishModeHost {
   144  			continue
   145  		}
   146  
   147  		port := portSpec(portConfig.TargetPort, portConfig.Protocol)
   148  		exposedPorts[port] = struct{}{}
   149  	}
   150  
   151  	return exposedPorts
   152  }
   153  
   154  func (c *containerConfig) config() *enginecontainer.Config {
   155  	genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE")
   156  	env := append(c.spec().Env, genericEnvs...)
   157  
   158  	config := &enginecontainer.Config{
   159  		Labels:       c.labels(),
   160  		StopSignal:   c.spec().StopSignal,
   161  		User:         c.spec().User,
   162  		Hostname:     c.spec().Hostname,
   163  		Env:          env,
   164  		WorkingDir:   c.spec().Dir,
   165  		Tty:          c.spec().TTY,
   166  		OpenStdin:    c.spec().OpenStdin,
   167  		Image:        c.image(),
   168  		ExposedPorts: c.exposedPorts(),
   169  		Healthcheck:  c.healthcheck(),
   170  	}
   171  
   172  	if len(c.spec().Command) > 0 {
   173  		// If Command is provided, we replace the whole invocation with Command
   174  		// by replacing Entrypoint and specifying Cmd. Args is ignored in this
   175  		// case.
   176  		config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
   177  		config.Cmd = append(config.Cmd, c.spec().Args...)
   178  	} else if len(c.spec().Args) > 0 {
   179  		// In this case, we assume the image has an Entrypoint and Args
   180  		// specifies the arguments for that entrypoint.
   181  		config.Cmd = c.spec().Args
   182  	}
   183  
   184  	return config
   185  }
   186  
   187  func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig {
   188  	hcSpec := c.spec().Healthcheck
   189  	if hcSpec == nil {
   190  		return nil
   191  	}
   192  	interval, _ := gogotypes.DurationFromProto(hcSpec.Interval)
   193  	timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout)
   194  	startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod)
   195  	return &enginecontainer.HealthConfig{
   196  		Test:        hcSpec.Test,
   197  		Interval:    interval,
   198  		Timeout:     timeout,
   199  		Retries:     int(hcSpec.Retries),
   200  		StartPeriod: startPeriod,
   201  	}
   202  }
   203  
   204  func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
   205  	hc := &enginecontainer.HostConfig{
   206  		Resources:    c.resources(),
   207  		Mounts:       c.mounts(),
   208  		Tmpfs:        c.tmpfs(),
   209  		GroupAdd:     c.spec().Groups,
   210  		PortBindings: c.portBindings(),
   211  		Init:         c.init(),
   212  		Isolation:    c.isolation(),
   213  		CapAdd:       c.spec().CapabilityAdd,
   214  		CapDrop:      c.spec().CapabilityDrop,
   215  	}
   216  
   217  	// The format of extra hosts on swarmkit is specified in:
   218  	// http://man7.org/linux/man-pages/man5/hosts.5.html
   219  	//    IP_address canonical_hostname [aliases...]
   220  	// However, the format of ExtraHosts in HostConfig is
   221  	//    <host>:<ip>
   222  	// We need to do the conversion here
   223  	// (Alias is ignored for now)
   224  	for _, entry := range c.spec().Hosts {
   225  		parts := strings.Fields(entry)
   226  		if len(parts) > 1 {
   227  			hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0]))
   228  		}
   229  	}
   230  
   231  	if c.task.LogDriver != nil {
   232  		hc.LogConfig = enginecontainer.LogConfig{
   233  			Type:   c.task.LogDriver.Name,
   234  			Config: c.task.LogDriver.Options,
   235  		}
   236  	}
   237  
   238  	return hc
   239  }
   240  
   241  func (c *containerConfig) labels() map[string]string {
   242  	var (
   243  		system = map[string]string{
   244  			"task":         "", // mark as cluster task
   245  			"task.id":      c.task.ID,
   246  			"task.name":    naming.Task(c.task),
   247  			"node.id":      c.task.NodeID,
   248  			"service.id":   c.task.ServiceID,
   249  			"service.name": c.task.ServiceAnnotations.Name,
   250  		}
   251  		labels = make(map[string]string)
   252  	)
   253  
   254  	// base labels are those defined in the spec.
   255  	for k, v := range c.spec().Labels {
   256  		labels[k] = v
   257  	}
   258  
   259  	// we then apply the overrides from the task, which may be set via the
   260  	// orchestrator.
   261  	for k, v := range c.task.Annotations.Labels {
   262  		labels[k] = v
   263  	}
   264  
   265  	// finally, we apply the system labels, which override all labels.
   266  	for k, v := range system {
   267  		labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
   268  	}
   269  
   270  	return labels
   271  }
   272  
   273  func (c *containerConfig) tmpfs() map[string]string {
   274  	r := make(map[string]string)
   275  
   276  	for _, spec := range c.spec().Mounts {
   277  		if spec.Type != api.MountTypeTmpfs {
   278  			continue
   279  		}
   280  
   281  		r[spec.Target] = getMountMask(&spec)
   282  	}
   283  
   284  	return r
   285  }
   286  
   287  func (c *containerConfig) mounts() []enginemount.Mount {
   288  	var r []enginemount.Mount
   289  	for _, mount := range c.spec().Mounts {
   290  		r = append(r, convertMount(mount))
   291  	}
   292  	return r
   293  }
   294  
   295  func convertMount(m api.Mount) enginemount.Mount {
   296  	mount := enginemount.Mount{
   297  		Source:   m.Source,
   298  		Target:   m.Target,
   299  		ReadOnly: m.ReadOnly,
   300  	}
   301  
   302  	switch m.Type {
   303  	case api.MountTypeBind:
   304  		mount.Type = enginemount.TypeBind
   305  	case api.MountTypeVolume:
   306  		mount.Type = enginemount.TypeVolume
   307  	case api.MountTypeNamedPipe:
   308  		mount.Type = enginemount.TypeNamedPipe
   309  	}
   310  
   311  	if m.BindOptions != nil {
   312  		mount.BindOptions = &enginemount.BindOptions{
   313  			NonRecursive: m.BindOptions.NonRecursive,
   314  		}
   315  		switch m.BindOptions.Propagation {
   316  		case api.MountPropagationRPrivate:
   317  			mount.BindOptions.Propagation = enginemount.PropagationRPrivate
   318  		case api.MountPropagationPrivate:
   319  			mount.BindOptions.Propagation = enginemount.PropagationPrivate
   320  		case api.MountPropagationRSlave:
   321  			mount.BindOptions.Propagation = enginemount.PropagationRSlave
   322  		case api.MountPropagationSlave:
   323  			mount.BindOptions.Propagation = enginemount.PropagationSlave
   324  		case api.MountPropagationRShared:
   325  			mount.BindOptions.Propagation = enginemount.PropagationRShared
   326  		case api.MountPropagationShared:
   327  			mount.BindOptions.Propagation = enginemount.PropagationShared
   328  		}
   329  	}
   330  
   331  	if m.VolumeOptions != nil {
   332  		mount.VolumeOptions = &enginemount.VolumeOptions{
   333  			NoCopy: m.VolumeOptions.NoCopy,
   334  		}
   335  		if m.VolumeOptions.Labels != nil {
   336  			mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels))
   337  			for k, v := range m.VolumeOptions.Labels {
   338  				mount.VolumeOptions.Labels[k] = v
   339  			}
   340  		}
   341  		if m.VolumeOptions.DriverConfig != nil {
   342  			mount.VolumeOptions.DriverConfig = &enginemount.Driver{
   343  				Name: m.VolumeOptions.DriverConfig.Name,
   344  			}
   345  			if m.VolumeOptions.DriverConfig.Options != nil {
   346  				mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options))
   347  				for k, v := range m.VolumeOptions.DriverConfig.Options {
   348  					mount.VolumeOptions.DriverConfig.Options[k] = v
   349  				}
   350  			}
   351  		}
   352  	}
   353  	return mount
   354  }
   355  
   356  func getMountMask(m *api.Mount) string {
   357  	var maskOpts []string
   358  	if m.ReadOnly {
   359  		maskOpts = append(maskOpts, "ro")
   360  	}
   361  
   362  	switch m.Type {
   363  	case api.MountTypeTmpfs:
   364  		if m.TmpfsOptions == nil {
   365  			break
   366  		}
   367  
   368  		if m.TmpfsOptions.Mode != 0 {
   369  			maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode))
   370  		}
   371  
   372  		if m.TmpfsOptions.SizeBytes != 0 {
   373  			// calculate suffix here, making this linux specific, but that is
   374  			// okay, since API is that way anyways.
   375  
   376  			// we do this by finding the suffix that divides evenly into the
   377  			// value, returning the value itself, with no suffix, if it fails.
   378  			//
   379  			// For the most part, we don't enforce any semantic to this values.
   380  			// The operating system will usually align this and enforce minimum
   381  			// and maximums.
   382  			var (
   383  				size   = m.TmpfsOptions.SizeBytes
   384  				suffix string
   385  			)
   386  			for _, r := range []struct {
   387  				suffix  string
   388  				divisor int64
   389  			}{
   390  				{"g", 1 << 30},
   391  				{"m", 1 << 20},
   392  				{"k", 1 << 10},
   393  			} {
   394  				if size%r.divisor == 0 {
   395  					size = size / r.divisor
   396  					suffix = r.suffix
   397  					break
   398  				}
   399  			}
   400  
   401  			maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix))
   402  		}
   403  
   404  		if opts := m.TmpfsOptions.Options; opts != "" {
   405  			validOpts := map[string]bool{
   406  				"exec":   true,
   407  				"noexec": true,
   408  			}
   409  			for _, opt := range strings.Split(strings.ToLower(opts), ",") {
   410  				if _, ok := validOpts[opt]; ok {
   411  					maskOpts = append(maskOpts, opt)
   412  				}
   413  			}
   414  		}
   415  	}
   416  
   417  	return strings.Join(maskOpts, ",")
   418  }
   419  
   420  // This handles the case of volumes that are defined inside a service Mount
   421  func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volume.VolumeCreateBody {
   422  	var (
   423  		driverName string
   424  		driverOpts map[string]string
   425  		labels     map[string]string
   426  	)
   427  
   428  	if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
   429  		driverName = mount.VolumeOptions.DriverConfig.Name
   430  		driverOpts = mount.VolumeOptions.DriverConfig.Options
   431  		labels = mount.VolumeOptions.Labels
   432  	}
   433  
   434  	return &volume.VolumeCreateBody{
   435  		Name:       mount.Source,
   436  		Driver:     driverName,
   437  		DriverOpts: driverOpts,
   438  		Labels:     labels,
   439  	}
   440  }
   441  
   442  func (c *containerConfig) resources() enginecontainer.Resources {
   443  	resources := enginecontainer.Resources{}
   444  
   445  	// set pids limit
   446  	pidsLimit := c.spec().PidsLimit
   447  	if pidsLimit > 0 {
   448  		resources.PidsLimit = &pidsLimit
   449  	}
   450  
   451  	resources.Ulimits = make([]*units.Ulimit, len(c.spec().Ulimits))
   452  	for i, ulimit := range c.spec().Ulimits {
   453  		resources.Ulimits[i] = &units.Ulimit{
   454  			Name: ulimit.Name,
   455  			Soft: ulimit.Soft,
   456  			Hard: ulimit.Hard,
   457  		}
   458  	}
   459  
   460  	// If no limits are specified let the engine use its defaults.
   461  	//
   462  	// TODO(aluzzardi): We might want to set some limits anyway otherwise
   463  	// "unlimited" tasks will step over the reservation of other tasks.
   464  	r := c.task.Spec.Resources
   465  	if r == nil || r.Limits == nil {
   466  		return resources
   467  	}
   468  
   469  	if r.Limits.MemoryBytes > 0 {
   470  		resources.Memory = r.Limits.MemoryBytes
   471  	}
   472  
   473  	if r.Limits.NanoCPUs > 0 {
   474  		// CPU Period must be set in microseconds.
   475  		resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
   476  		resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
   477  	}
   478  
   479  	return resources
   480  }
   481  
   482  func (c *containerConfig) virtualIP(networkID string) string {
   483  	if c.task.Endpoint == nil {
   484  		return ""
   485  	}
   486  
   487  	for _, vip := range c.task.Endpoint.VirtualIPs {
   488  		// We only support IPv4 VIPs for now.
   489  		if vip.NetworkID == networkID {
   490  			vip, _, err := net.ParseCIDR(vip.Addr)
   491  			if err != nil {
   492  				return ""
   493  			}
   494  
   495  			return vip.String()
   496  		}
   497  	}
   498  
   499  	return ""
   500  }
   501  
   502  func (c *containerConfig) networkingConfig() *network.NetworkingConfig {
   503  	epConfig := make(map[string]*network.EndpointSettings)
   504  	for _, na := range c.task.Networks {
   505  		var ipv4, ipv6 string
   506  		for _, addr := range na.Addresses {
   507  			ip, _, err := net.ParseCIDR(addr)
   508  			if err != nil {
   509  				continue
   510  			}
   511  
   512  			if ip.To4() != nil {
   513  				ipv4 = ip.String()
   514  				continue
   515  			}
   516  
   517  			if ip.To16() != nil {
   518  				ipv6 = ip.String()
   519  			}
   520  		}
   521  
   522  		epSettings := &network.EndpointSettings{
   523  			IPAMConfig: &network.EndpointIPAMConfig{
   524  				IPv4Address: ipv4,
   525  				IPv6Address: ipv6,
   526  			},
   527  		}
   528  
   529  		epConfig[na.Network.Spec.Annotations.Name] = epSettings
   530  	}
   531  
   532  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   533  }
   534  
   535  // networks returns a list of network names attached to the container. The
   536  // returned name can be used to lookup the corresponding network create
   537  // options.
   538  func (c *containerConfig) networks() []string {
   539  	var networks []string
   540  
   541  	for name := range c.networksAttachments {
   542  		networks = append(networks, name)
   543  	}
   544  
   545  	return networks
   546  }
   547  
   548  func (c *containerConfig) networkCreateOptions(name string) (types.NetworkCreate, error) {
   549  	na, ok := c.networksAttachments[name]
   550  	if !ok {
   551  		return types.NetworkCreate{}, errors.New("container: unknown network referenced")
   552  	}
   553  
   554  	options := types.NetworkCreate{
   555  		Driver: na.Network.DriverState.Name,
   556  		IPAM: &network.IPAM{
   557  			Driver: na.Network.IPAM.Driver.Name,
   558  		},
   559  		Options:        na.Network.DriverState.Options,
   560  		CheckDuplicate: true,
   561  	}
   562  
   563  	for _, ic := range na.Network.IPAM.Configs {
   564  		c := network.IPAMConfig{
   565  			Subnet:  ic.Subnet,
   566  			IPRange: ic.Range,
   567  			Gateway: ic.Gateway,
   568  		}
   569  		options.IPAM.Config = append(options.IPAM.Config, c)
   570  	}
   571  
   572  	return options, nil
   573  }
   574  
   575  func (c containerConfig) eventFilter() filters.Args {
   576  	filter := filters.NewArgs()
   577  	filter.Add("type", events.ContainerEventType)
   578  	filter.Add("name", c.name())
   579  	filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
   580  	return filter
   581  }
   582  
   583  func (c *containerConfig) init() *bool {
   584  	if c.spec().Init != nil {
   585  		return &c.spec().Init.Value
   586  	}
   587  	return nil
   588  }