github.com/mhy12345/docker@v1.12.3/daemon/cluster/executor/container/container.go (about)

     1  package container
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/Sirupsen/logrus"
    11  
    12  	clustertypes "github.com/docker/docker/daemon/cluster/provider"
    13  	"github.com/docker/docker/reference"
    14  	"github.com/docker/engine-api/types"
    15  	enginecontainer "github.com/docker/engine-api/types/container"
    16  	"github.com/docker/engine-api/types/events"
    17  	"github.com/docker/engine-api/types/filters"
    18  	"github.com/docker/engine-api/types/network"
    19  	"github.com/docker/swarmkit/agent/exec"
    20  	"github.com/docker/swarmkit/api"
    21  )
    22  
    23  const (
    24  	// Explicitly use the kernel's default setting for CPU quota of 100ms.
    25  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
    26  	cpuQuotaPeriod = 100 * time.Millisecond
    27  
    28  	// systemLabelPrefix represents the reserved namespace for system labels.
    29  	systemLabelPrefix = "com.docker.swarm"
    30  )
    31  
    32  // containerConfig converts task properties into docker container compatible
    33  // components.
    34  type containerConfig struct {
    35  	task                *api.Task
    36  	networksAttachments map[string]*api.NetworkAttachment
    37  }
    38  
    39  // newContainerConfig returns a validated container config. No methods should
    40  // return an error if this function returns without error.
    41  func newContainerConfig(t *api.Task) (*containerConfig, error) {
    42  	var c containerConfig
    43  	return &c, c.setTask(t)
    44  }
    45  
    46  func (c *containerConfig) setTask(t *api.Task) error {
    47  	container := t.Spec.GetContainer()
    48  	if container == nil {
    49  		return exec.ErrRuntimeUnsupported
    50  	}
    51  
    52  	if container.Image == "" {
    53  		return ErrImageRequired
    54  	}
    55  
    56  	if err := validateMounts(container.Mounts); err != nil {
    57  		return err
    58  	}
    59  
    60  	// index the networks by name
    61  	c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
    62  	for _, attachment := range t.Networks {
    63  		c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
    64  	}
    65  
    66  	c.task = t
    67  	return nil
    68  }
    69  
    70  func (c *containerConfig) endpoint() *api.Endpoint {
    71  	return c.task.Endpoint
    72  }
    73  
    74  func (c *containerConfig) spec() *api.ContainerSpec {
    75  	return c.task.Spec.GetContainer()
    76  }
    77  
    78  func (c *containerConfig) name() string {
    79  	if c.task.Annotations.Name != "" {
    80  		// if set, use the container Annotations.Name field, set in the orchestrator.
    81  		return c.task.Annotations.Name
    82  	}
    83  
    84  	// fallback to service.slot.id.
    85  	return strings.Join([]string{c.task.ServiceAnnotations.Name, fmt.Sprint(c.task.Slot), c.task.ID}, ".")
    86  }
    87  
    88  func (c *containerConfig) image() string {
    89  	raw := c.spec().Image
    90  	ref, err := reference.ParseNamed(raw)
    91  	if err != nil {
    92  		return raw
    93  	}
    94  	return reference.WithDefaultTag(ref).String()
    95  }
    96  
    97  func (c *containerConfig) config() *enginecontainer.Config {
    98  	config := &enginecontainer.Config{
    99  		Labels:     c.labels(),
   100  		User:       c.spec().User,
   101  		Env:        c.spec().Env,
   102  		WorkingDir: c.spec().Dir,
   103  		Image:      c.image(),
   104  		Volumes:    c.volumes(),
   105  	}
   106  
   107  	if len(c.spec().Command) > 0 {
   108  		// If Command is provided, we replace the whole invocation with Command
   109  		// by replacing Entrypoint and specifying Cmd. Args is ignored in this
   110  		// case.
   111  		config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
   112  		config.Cmd = append(config.Cmd, c.spec().Args...)
   113  	} else if len(c.spec().Args) > 0 {
   114  		// In this case, we assume the image has an Entrypoint and Args
   115  		// specifies the arguments for that entrypoint.
   116  		config.Cmd = c.spec().Args
   117  	}
   118  
   119  	return config
   120  }
   121  
   122  func (c *containerConfig) labels() map[string]string {
   123  	var (
   124  		system = map[string]string{
   125  			"task":         "", // mark as cluster task
   126  			"task.id":      c.task.ID,
   127  			"task.name":    fmt.Sprintf("%v.%v", c.task.ServiceAnnotations.Name, c.task.Slot),
   128  			"node.id":      c.task.NodeID,
   129  			"service.id":   c.task.ServiceID,
   130  			"service.name": c.task.ServiceAnnotations.Name,
   131  		}
   132  		labels = make(map[string]string)
   133  	)
   134  
   135  	// base labels are those defined in the spec.
   136  	for k, v := range c.spec().Labels {
   137  		labels[k] = v
   138  	}
   139  
   140  	// we then apply the overrides from the task, which may be set via the
   141  	// orchestrator.
   142  	for k, v := range c.task.Annotations.Labels {
   143  		labels[k] = v
   144  	}
   145  
   146  	// finally, we apply the system labels, which override all labels.
   147  	for k, v := range system {
   148  		labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
   149  	}
   150  
   151  	return labels
   152  }
   153  
   154  // volumes gets placed into the Volumes field on the containerConfig.
   155  func (c *containerConfig) volumes() map[string]struct{} {
   156  	r := make(map[string]struct{})
   157  	// Volumes *only* creates anonymous volumes. The rest is mixed in with
   158  	// binds, which aren't actually binds. Basically, any volume that
   159  	// results in a single component must be added here.
   160  	//
   161  	// This is reversed engineered from the behavior of the engine API.
   162  	for _, mount := range c.spec().Mounts {
   163  		if mount.Type == api.MountTypeVolume && mount.Source == "" {
   164  			r[mount.Target] = struct{}{}
   165  		}
   166  	}
   167  	return r
   168  }
   169  
   170  func (c *containerConfig) tmpfs() map[string]string {
   171  	r := make(map[string]string)
   172  
   173  	for _, spec := range c.spec().Mounts {
   174  		if spec.Type != api.MountTypeTmpfs {
   175  			continue
   176  		}
   177  
   178  		r[spec.Target] = getMountMask(&spec)
   179  	}
   180  
   181  	return r
   182  }
   183  
   184  func (c *containerConfig) binds() []string {
   185  	var r []string
   186  	for _, mount := range c.spec().Mounts {
   187  		if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") {
   188  			spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target)
   189  			mask := getMountMask(&mount)
   190  			if mask != "" {
   191  				spec = fmt.Sprintf("%s:%s", spec, mask)
   192  			}
   193  			r = append(r, spec)
   194  		}
   195  	}
   196  	return r
   197  }
   198  
   199  func getMountMask(m *api.Mount) string {
   200  	var maskOpts []string
   201  	if m.ReadOnly {
   202  		maskOpts = append(maskOpts, "ro")
   203  	}
   204  
   205  	switch m.Type {
   206  	case api.MountTypeVolume:
   207  		if m.VolumeOptions != nil && m.VolumeOptions.NoCopy {
   208  			maskOpts = append(maskOpts, "nocopy")
   209  		}
   210  	case api.MountTypeBind:
   211  		if m.BindOptions == nil {
   212  			break
   213  		}
   214  
   215  		switch m.BindOptions.Propagation {
   216  		case api.MountPropagationPrivate:
   217  			maskOpts = append(maskOpts, "private")
   218  		case api.MountPropagationRPrivate:
   219  			maskOpts = append(maskOpts, "rprivate")
   220  		case api.MountPropagationShared:
   221  			maskOpts = append(maskOpts, "shared")
   222  		case api.MountPropagationRShared:
   223  			maskOpts = append(maskOpts, "rshared")
   224  		case api.MountPropagationSlave:
   225  			maskOpts = append(maskOpts, "slave")
   226  		case api.MountPropagationRSlave:
   227  			maskOpts = append(maskOpts, "rslave")
   228  		}
   229  	case api.MountTypeTmpfs:
   230  		if m.TmpfsOptions == nil {
   231  			break
   232  		}
   233  
   234  		if m.TmpfsOptions.Mode != 0 {
   235  			maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode))
   236  		}
   237  
   238  		if m.TmpfsOptions.SizeBytes != 0 {
   239  			// calculate suffix here, making this linux specific, but that is
   240  			// okay, since API is that way anyways.
   241  
   242  			// we do this by finding the suffix that divides evenly into the
   243  			// value, returing the value itself, with no suffix, if it fails.
   244  			//
   245  			// For the most part, we don't enforce any semantic to this values.
   246  			// The operating system will usually align this and enforce minimum
   247  			// and maximums.
   248  			var (
   249  				size   = m.TmpfsOptions.SizeBytes
   250  				suffix string
   251  			)
   252  			for _, r := range []struct {
   253  				suffix  string
   254  				divisor int64
   255  			}{
   256  				{"g", 1 << 30},
   257  				{"m", 1 << 20},
   258  				{"k", 1 << 10},
   259  			} {
   260  				if size%r.divisor == 0 {
   261  					size = size / r.divisor
   262  					suffix = r.suffix
   263  					break
   264  				}
   265  			}
   266  
   267  			maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix))
   268  		}
   269  	}
   270  
   271  	return strings.Join(maskOpts, ",")
   272  }
   273  
   274  func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
   275  	hc := &enginecontainer.HostConfig{
   276  		Resources: c.resources(),
   277  		Binds:     c.binds(),
   278  		Tmpfs:     c.tmpfs(),
   279  	}
   280  
   281  	if c.task.LogDriver != nil {
   282  		hc.LogConfig = enginecontainer.LogConfig{
   283  			Type:   c.task.LogDriver.Name,
   284  			Config: c.task.LogDriver.Options,
   285  		}
   286  	}
   287  
   288  	return hc
   289  }
   290  
   291  // This handles the case of volumes that are defined inside a service Mount
   292  func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest {
   293  	var (
   294  		driverName string
   295  		driverOpts map[string]string
   296  		labels     map[string]string
   297  	)
   298  
   299  	if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
   300  		driverName = mount.VolumeOptions.DriverConfig.Name
   301  		driverOpts = mount.VolumeOptions.DriverConfig.Options
   302  		labels = mount.VolumeOptions.Labels
   303  	}
   304  
   305  	if mount.VolumeOptions != nil {
   306  		return &types.VolumeCreateRequest{
   307  			Name:       mount.Source,
   308  			Driver:     driverName,
   309  			DriverOpts: driverOpts,
   310  			Labels:     labels,
   311  		}
   312  	}
   313  	return nil
   314  }
   315  
   316  func (c *containerConfig) resources() enginecontainer.Resources {
   317  	resources := enginecontainer.Resources{}
   318  
   319  	// If no limits are specified let the engine use its defaults.
   320  	//
   321  	// TODO(aluzzardi): We might want to set some limits anyway otherwise
   322  	// "unlimited" tasks will step over the reservation of other tasks.
   323  	r := c.task.Spec.Resources
   324  	if r == nil || r.Limits == nil {
   325  		return resources
   326  	}
   327  
   328  	if r.Limits.MemoryBytes > 0 {
   329  		resources.Memory = r.Limits.MemoryBytes
   330  	}
   331  
   332  	if r.Limits.NanoCPUs > 0 {
   333  		// CPU Period must be set in microseconds.
   334  		resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
   335  		resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
   336  	}
   337  
   338  	return resources
   339  }
   340  
   341  // Docker daemon supports just 1 network during container create.
   342  func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
   343  	var networks []*api.NetworkAttachment
   344  	if c.task.Spec.GetContainer() != nil {
   345  		networks = c.task.Networks
   346  	}
   347  
   348  	epConfig := make(map[string]*network.EndpointSettings)
   349  	if len(networks) > 0 {
   350  		epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
   351  	}
   352  
   353  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   354  }
   355  
   356  // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
   357  func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
   358  	var networks []*api.NetworkAttachment
   359  	if c.task.Spec.GetContainer() != nil {
   360  		networks = c.task.Networks
   361  	}
   362  
   363  	// First network is used during container create. Other networks are used in "docker network connect"
   364  	if len(networks) < 2 {
   365  		return nil
   366  	}
   367  
   368  	epConfig := make(map[string]*network.EndpointSettings)
   369  	for _, na := range networks[1:] {
   370  		epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
   371  	}
   372  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   373  }
   374  
   375  func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
   376  	var ipv4, ipv6 string
   377  	for _, addr := range na.Addresses {
   378  		ip, _, err := net.ParseCIDR(addr)
   379  		if err != nil {
   380  			continue
   381  		}
   382  
   383  		if ip.To4() != nil {
   384  			ipv4 = ip.String()
   385  			continue
   386  		}
   387  
   388  		if ip.To16() != nil {
   389  			ipv6 = ip.String()
   390  		}
   391  	}
   392  
   393  	return &network.EndpointSettings{
   394  		IPAMConfig: &network.EndpointIPAMConfig{
   395  			IPv4Address: ipv4,
   396  			IPv6Address: ipv6,
   397  		},
   398  	}
   399  }
   400  
   401  func (c *containerConfig) virtualIP(networkID string) string {
   402  	if c.task.Endpoint == nil {
   403  		return ""
   404  	}
   405  
   406  	for _, eVip := range c.task.Endpoint.VirtualIPs {
   407  		// We only support IPv4 VIPs for now.
   408  		if eVip.NetworkID == networkID {
   409  			vip, _, err := net.ParseCIDR(eVip.Addr)
   410  			if err != nil {
   411  				return ""
   412  			}
   413  
   414  			return vip.String()
   415  		}
   416  	}
   417  
   418  	return ""
   419  }
   420  
   421  func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
   422  	if len(c.task.Networks) == 0 {
   423  		return nil
   424  	}
   425  
   426  	logrus.Debugf("Creating service config in agent for t = %+v", c.task)
   427  	svcCfg := &clustertypes.ServiceConfig{
   428  		Name:             c.task.ServiceAnnotations.Name,
   429  		Aliases:          make(map[string][]string),
   430  		ID:               c.task.ServiceID,
   431  		VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
   432  	}
   433  
   434  	for _, na := range c.task.Networks {
   435  		svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
   436  			// We support only IPv4 virtual IP for now.
   437  			IPv4: c.virtualIP(na.Network.ID),
   438  		}
   439  		if len(na.Aliases) > 0 {
   440  			svcCfg.Aliases[na.Network.ID] = na.Aliases
   441  		}
   442  	}
   443  
   444  	if c.task.Endpoint != nil {
   445  		for _, ePort := range c.task.Endpoint.Ports {
   446  			svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
   447  				Name:          ePort.Name,
   448  				Protocol:      int32(ePort.Protocol),
   449  				TargetPort:    ePort.TargetPort,
   450  				PublishedPort: ePort.PublishedPort,
   451  			})
   452  		}
   453  	}
   454  
   455  	return svcCfg
   456  }
   457  
   458  // networks returns a list of network names attached to the container. The
   459  // returned name can be used to lookup the corresponding network create
   460  // options.
   461  func (c *containerConfig) networks() []string {
   462  	var networks []string
   463  
   464  	for name := range c.networksAttachments {
   465  		networks = append(networks, name)
   466  	}
   467  
   468  	return networks
   469  }
   470  
   471  func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
   472  	na, ok := c.networksAttachments[name]
   473  	if !ok {
   474  		return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
   475  	}
   476  
   477  	options := types.NetworkCreate{
   478  		// ID:     na.Network.ID,
   479  		Driver: na.Network.DriverState.Name,
   480  		IPAM: network.IPAM{
   481  			Driver: na.Network.IPAM.Driver.Name,
   482  		},
   483  		Options:        na.Network.DriverState.Options,
   484  		Labels:         na.Network.Spec.Annotations.Labels,
   485  		Internal:       na.Network.Spec.Internal,
   486  		EnableIPv6:     na.Network.Spec.Ipv6Enabled,
   487  		CheckDuplicate: true,
   488  	}
   489  
   490  	for _, ic := range na.Network.IPAM.Configs {
   491  		c := network.IPAMConfig{
   492  			Subnet:  ic.Subnet,
   493  			IPRange: ic.Range,
   494  			Gateway: ic.Gateway,
   495  		}
   496  		options.IPAM.Config = append(options.IPAM.Config, c)
   497  	}
   498  
   499  	return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
   500  }
   501  
   502  func (c containerConfig) eventFilter() filters.Args {
   503  	filter := filters.NewArgs()
   504  	filter.Add("type", events.ContainerEventType)
   505  	filter.Add("name", c.name())
   506  	filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
   507  	return filter
   508  }