github.com/kim0/docker@v0.6.2-0.20161130212042-4addda3f07e7/daemon/cluster/executor/container/container.go (about)

     1  package container
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/Sirupsen/logrus"
    11  
    12  	"github.com/docker/docker/api/types"
    13  	enginecontainer "github.com/docker/docker/api/types/container"
    14  	"github.com/docker/docker/api/types/events"
    15  	"github.com/docker/docker/api/types/filters"
    16  	"github.com/docker/docker/api/types/network"
    17  	clustertypes "github.com/docker/docker/daemon/cluster/provider"
    18  	"github.com/docker/docker/reference"
    19  	"github.com/docker/swarmkit/agent/exec"
    20  	"github.com/docker/swarmkit/api"
    21  	"github.com/docker/swarmkit/protobuf/ptypes"
    22  )
    23  
    24  const (
    25  	// Explicitly use the kernel's default setting for CPU quota of 100ms.
    26  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
    27  	cpuQuotaPeriod = 100 * time.Millisecond
    28  
    29  	// systemLabelPrefix represents the reserved namespace for system labels.
    30  	systemLabelPrefix = "com.docker.swarm"
    31  )
    32  
    33  // containerConfig converts task properties into docker container compatible
    34  // components.
    35  type containerConfig struct {
    36  	task                *api.Task
    37  	networksAttachments map[string]*api.NetworkAttachment
    38  }
    39  
    40  // newContainerConfig returns a validated container config. No methods should
    41  // return an error if this function returns without error.
    42  func newContainerConfig(t *api.Task) (*containerConfig, error) {
    43  	var c containerConfig
    44  	return &c, c.setTask(t)
    45  }
    46  
    47  func (c *containerConfig) setTask(t *api.Task) error {
    48  	if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil {
    49  		return exec.ErrRuntimeUnsupported
    50  	}
    51  
    52  	container := t.Spec.GetContainer()
    53  	if container != nil {
    54  		if container.Image == "" {
    55  			return ErrImageRequired
    56  		}
    57  
    58  		if err := validateMounts(container.Mounts); err != nil {
    59  			return err
    60  		}
    61  	}
    62  
    63  	// index the networks by name
    64  	c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
    65  	for _, attachment := range t.Networks {
    66  		c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
    67  	}
    68  
    69  	c.task = t
    70  	return nil
    71  }
    72  
    73  func (c *containerConfig) id() string {
    74  	attachment := c.task.Spec.GetAttachment()
    75  	if attachment == nil {
    76  		return ""
    77  	}
    78  
    79  	return attachment.ContainerID
    80  }
    81  
    82  func (c *containerConfig) taskID() string {
    83  	return c.task.ID
    84  }
    85  
    86  func (c *containerConfig) endpoint() *api.Endpoint {
    87  	return c.task.Endpoint
    88  }
    89  
    90  func (c *containerConfig) spec() *api.ContainerSpec {
    91  	return c.task.Spec.GetContainer()
    92  }
    93  
    94  func (c *containerConfig) nameOrID() string {
    95  	if c.task.Spec.GetContainer() != nil {
    96  		return c.name()
    97  	}
    98  
    99  	return c.id()
   100  }
   101  
   102  func (c *containerConfig) name() string {
   103  	if c.task.Annotations.Name != "" {
   104  		// if set, use the container Annotations.Name field, set in the orchestrator.
   105  		return c.task.Annotations.Name
   106  	}
   107  
   108  	slot := fmt.Sprint(c.task.Slot)
   109  	if slot == "" || c.task.Slot == 0 {
   110  		slot = c.task.NodeID
   111  	}
   112  
   113  	// fallback to service.slot.id.
   114  	return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID)
   115  }
   116  
   117  func (c *containerConfig) image() string {
   118  	raw := c.spec().Image
   119  	ref, err := reference.ParseNamed(raw)
   120  	if err != nil {
   121  		return raw
   122  	}
   123  	return reference.WithDefaultTag(ref).String()
   124  }
   125  
   126  func (c *containerConfig) config() *enginecontainer.Config {
   127  	config := &enginecontainer.Config{
   128  		Labels:      c.labels(),
   129  		User:        c.spec().User,
   130  		Env:         c.spec().Env,
   131  		WorkingDir:  c.spec().Dir,
   132  		Image:       c.image(),
   133  		Volumes:     c.volumes(),
   134  		Healthcheck: c.healthcheck(),
   135  	}
   136  
   137  	if len(c.spec().Command) > 0 {
   138  		// If Command is provided, we replace the whole invocation with Command
   139  		// by replacing Entrypoint and specifying Cmd. Args is ignored in this
   140  		// case.
   141  		config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
   142  		config.Cmd = append(config.Cmd, c.spec().Args...)
   143  	} else if len(c.spec().Args) > 0 {
   144  		// In this case, we assume the image has an Entrypoint and Args
   145  		// specifies the arguments for that entrypoint.
   146  		config.Cmd = c.spec().Args
   147  	}
   148  
   149  	return config
   150  }
   151  
   152  func (c *containerConfig) labels() map[string]string {
   153  	var (
   154  		system = map[string]string{
   155  			"task":         "", // mark as cluster task
   156  			"task.id":      c.task.ID,
   157  			"task.name":    c.name(),
   158  			"node.id":      c.task.NodeID,
   159  			"service.id":   c.task.ServiceID,
   160  			"service.name": c.task.ServiceAnnotations.Name,
   161  		}
   162  		labels = make(map[string]string)
   163  	)
   164  
   165  	// base labels are those defined in the spec.
   166  	for k, v := range c.spec().Labels {
   167  		labels[k] = v
   168  	}
   169  
   170  	// we then apply the overrides from the task, which may be set via the
   171  	// orchestrator.
   172  	for k, v := range c.task.Annotations.Labels {
   173  		labels[k] = v
   174  	}
   175  
   176  	// finally, we apply the system labels, which override all labels.
   177  	for k, v := range system {
   178  		labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
   179  	}
   180  
   181  	return labels
   182  }
   183  
   184  // volumes gets placed into the Volumes field on the containerConfig.
   185  func (c *containerConfig) volumes() map[string]struct{} {
   186  	r := make(map[string]struct{})
   187  	// Volumes *only* creates anonymous volumes. The rest is mixed in with
   188  	// binds, which aren't actually binds. Basically, any volume that
   189  	// results in a single component must be added here.
   190  	//
   191  	// This is reversed engineered from the behavior of the engine API.
   192  	for _, mount := range c.spec().Mounts {
   193  		if mount.Type == api.MountTypeVolume && mount.Source == "" {
   194  			r[mount.Target] = struct{}{}
   195  		}
   196  	}
   197  	return r
   198  }
   199  
   200  func (c *containerConfig) tmpfs() map[string]string {
   201  	r := make(map[string]string)
   202  
   203  	for _, spec := range c.spec().Mounts {
   204  		if spec.Type != api.MountTypeTmpfs {
   205  			continue
   206  		}
   207  
   208  		r[spec.Target] = getMountMask(&spec)
   209  	}
   210  
   211  	return r
   212  }
   213  
   214  func (c *containerConfig) binds() []string {
   215  	var r []string
   216  	for _, mount := range c.spec().Mounts {
   217  		if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") {
   218  			spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target)
   219  			mask := getMountMask(&mount)
   220  			if mask != "" {
   221  				spec = fmt.Sprintf("%s:%s", spec, mask)
   222  			}
   223  			r = append(r, spec)
   224  		}
   225  	}
   226  	return r
   227  }
   228  
   229  func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig {
   230  	hcSpec := c.spec().Healthcheck
   231  	if hcSpec == nil {
   232  		return nil
   233  	}
   234  	interval, _ := ptypes.Duration(hcSpec.Interval)
   235  	timeout, _ := ptypes.Duration(hcSpec.Timeout)
   236  	return &enginecontainer.HealthConfig{
   237  		Test:     hcSpec.Test,
   238  		Interval: interval,
   239  		Timeout:  timeout,
   240  		Retries:  int(hcSpec.Retries),
   241  	}
   242  }
   243  
   244  func getMountMask(m *api.Mount) string {
   245  	var maskOpts []string
   246  	if m.ReadOnly {
   247  		maskOpts = append(maskOpts, "ro")
   248  	}
   249  
   250  	switch m.Type {
   251  	case api.MountTypeVolume:
   252  		if m.VolumeOptions != nil && m.VolumeOptions.NoCopy {
   253  			maskOpts = append(maskOpts, "nocopy")
   254  		}
   255  	case api.MountTypeBind:
   256  		if m.BindOptions == nil {
   257  			break
   258  		}
   259  
   260  		switch m.BindOptions.Propagation {
   261  		case api.MountPropagationPrivate:
   262  			maskOpts = append(maskOpts, "private")
   263  		case api.MountPropagationRPrivate:
   264  			maskOpts = append(maskOpts, "rprivate")
   265  		case api.MountPropagationShared:
   266  			maskOpts = append(maskOpts, "shared")
   267  		case api.MountPropagationRShared:
   268  			maskOpts = append(maskOpts, "rshared")
   269  		case api.MountPropagationSlave:
   270  			maskOpts = append(maskOpts, "slave")
   271  		case api.MountPropagationRSlave:
   272  			maskOpts = append(maskOpts, "rslave")
   273  		}
   274  	case api.MountTypeTmpfs:
   275  		if m.TmpfsOptions == nil {
   276  			break
   277  		}
   278  
   279  		if m.TmpfsOptions.Mode != 0 {
   280  			maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode))
   281  		}
   282  
   283  		if m.TmpfsOptions.SizeBytes != 0 {
   284  			// calculate suffix here, making this linux specific, but that is
   285  			// okay, since API is that way anyways.
   286  
   287  			// we do this by finding the suffix that divides evenly into the
   288  			// value, returing the value itself, with no suffix, if it fails.
   289  			//
   290  			// For the most part, we don't enforce any semantic to this values.
   291  			// The operating system will usually align this and enforce minimum
   292  			// and maximums.
   293  			var (
   294  				size   = m.TmpfsOptions.SizeBytes
   295  				suffix string
   296  			)
   297  			for _, r := range []struct {
   298  				suffix  string
   299  				divisor int64
   300  			}{
   301  				{"g", 1 << 30},
   302  				{"m", 1 << 20},
   303  				{"k", 1 << 10},
   304  			} {
   305  				if size%r.divisor == 0 {
   306  					size = size / r.divisor
   307  					suffix = r.suffix
   308  					break
   309  				}
   310  			}
   311  
   312  			maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix))
   313  		}
   314  	}
   315  
   316  	return strings.Join(maskOpts, ",")
   317  }
   318  
   319  func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
   320  	hc := &enginecontainer.HostConfig{
   321  		Resources: c.resources(),
   322  		Binds:     c.binds(),
   323  		Tmpfs:     c.tmpfs(),
   324  		GroupAdd:  c.spec().Groups,
   325  	}
   326  
   327  	if c.task.LogDriver != nil {
   328  		hc.LogConfig = enginecontainer.LogConfig{
   329  			Type:   c.task.LogDriver.Name,
   330  			Config: c.task.LogDriver.Options,
   331  		}
   332  	}
   333  
   334  	return hc
   335  }
   336  
   337  // This handles the case of volumes that are defined inside a service Mount
   338  func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest {
   339  	var (
   340  		driverName string
   341  		driverOpts map[string]string
   342  		labels     map[string]string
   343  	)
   344  
   345  	if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
   346  		driverName = mount.VolumeOptions.DriverConfig.Name
   347  		driverOpts = mount.VolumeOptions.DriverConfig.Options
   348  		labels = mount.VolumeOptions.Labels
   349  	}
   350  
   351  	if mount.VolumeOptions != nil {
   352  		return &types.VolumeCreateRequest{
   353  			Name:       mount.Source,
   354  			Driver:     driverName,
   355  			DriverOpts: driverOpts,
   356  			Labels:     labels,
   357  		}
   358  	}
   359  	return nil
   360  }
   361  
   362  func (c *containerConfig) resources() enginecontainer.Resources {
   363  	resources := enginecontainer.Resources{}
   364  
   365  	// If no limits are specified let the engine use its defaults.
   366  	//
   367  	// TODO(aluzzardi): We might want to set some limits anyway otherwise
   368  	// "unlimited" tasks will step over the reservation of other tasks.
   369  	r := c.task.Spec.Resources
   370  	if r == nil || r.Limits == nil {
   371  		return resources
   372  	}
   373  
   374  	if r.Limits.MemoryBytes > 0 {
   375  		resources.Memory = r.Limits.MemoryBytes
   376  	}
   377  
   378  	if r.Limits.NanoCPUs > 0 {
   379  		// CPU Period must be set in microseconds.
   380  		resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
   381  		resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
   382  	}
   383  
   384  	return resources
   385  }
   386  
   387  // Docker daemon supports just 1 network during container create.
   388  func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
   389  	var networks []*api.NetworkAttachment
   390  	if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil {
   391  		networks = c.task.Networks
   392  	}
   393  
   394  	epConfig := make(map[string]*network.EndpointSettings)
   395  	if len(networks) > 0 {
   396  		epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
   397  	}
   398  
   399  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   400  }
   401  
   402  // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
   403  func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
   404  	var networks []*api.NetworkAttachment
   405  	if c.task.Spec.GetContainer() != nil {
   406  		networks = c.task.Networks
   407  	}
   408  
   409  	// First network is used during container create. Other networks are used in "docker network connect"
   410  	if len(networks) < 2 {
   411  		return nil
   412  	}
   413  
   414  	epConfig := make(map[string]*network.EndpointSettings)
   415  	for _, na := range networks[1:] {
   416  		epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
   417  	}
   418  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   419  }
   420  
   421  func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
   422  	var ipv4, ipv6 string
   423  	for _, addr := range na.Addresses {
   424  		ip, _, err := net.ParseCIDR(addr)
   425  		if err != nil {
   426  			continue
   427  		}
   428  
   429  		if ip.To4() != nil {
   430  			ipv4 = ip.String()
   431  			continue
   432  		}
   433  
   434  		if ip.To16() != nil {
   435  			ipv6 = ip.String()
   436  		}
   437  	}
   438  
   439  	return &network.EndpointSettings{
   440  		NetworkID: na.Network.ID,
   441  		IPAMConfig: &network.EndpointIPAMConfig{
   442  			IPv4Address: ipv4,
   443  			IPv6Address: ipv6,
   444  		},
   445  	}
   446  }
   447  
   448  func (c *containerConfig) virtualIP(networkID string) string {
   449  	if c.task.Endpoint == nil {
   450  		return ""
   451  	}
   452  
   453  	for _, eVip := range c.task.Endpoint.VirtualIPs {
   454  		// We only support IPv4 VIPs for now.
   455  		if eVip.NetworkID == networkID {
   456  			vip, _, err := net.ParseCIDR(eVip.Addr)
   457  			if err != nil {
   458  				return ""
   459  			}
   460  
   461  			return vip.String()
   462  		}
   463  	}
   464  
   465  	return ""
   466  }
   467  
   468  func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
   469  	if len(c.task.Networks) == 0 {
   470  		return nil
   471  	}
   472  
   473  	logrus.Debugf("Creating service config in agent for t = %+v", c.task)
   474  	svcCfg := &clustertypes.ServiceConfig{
   475  		Name:             c.task.ServiceAnnotations.Name,
   476  		Aliases:          make(map[string][]string),
   477  		ID:               c.task.ServiceID,
   478  		VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
   479  	}
   480  
   481  	for _, na := range c.task.Networks {
   482  		svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
   483  			// We support only IPv4 virtual IP for now.
   484  			IPv4: c.virtualIP(na.Network.ID),
   485  		}
   486  		if len(na.Aliases) > 0 {
   487  			svcCfg.Aliases[na.Network.ID] = na.Aliases
   488  		}
   489  	}
   490  
   491  	if c.task.Endpoint != nil {
   492  		for _, ePort := range c.task.Endpoint.Ports {
   493  			svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
   494  				Name:          ePort.Name,
   495  				Protocol:      int32(ePort.Protocol),
   496  				TargetPort:    ePort.TargetPort,
   497  				PublishedPort: ePort.PublishedPort,
   498  			})
   499  		}
   500  	}
   501  
   502  	return svcCfg
   503  }
   504  
   505  // networks returns a list of network names attached to the container. The
   506  // returned name can be used to lookup the corresponding network create
   507  // options.
   508  func (c *containerConfig) networks() []string {
   509  	var networks []string
   510  
   511  	for name := range c.networksAttachments {
   512  		networks = append(networks, name)
   513  	}
   514  
   515  	return networks
   516  }
   517  
   518  func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
   519  	na, ok := c.networksAttachments[name]
   520  	if !ok {
   521  		return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
   522  	}
   523  
   524  	options := types.NetworkCreate{
   525  		// ID:     na.Network.ID,
   526  		Driver: na.Network.DriverState.Name,
   527  		IPAM: &network.IPAM{
   528  			Driver: na.Network.IPAM.Driver.Name,
   529  		},
   530  		Options:        na.Network.DriverState.Options,
   531  		Labels:         na.Network.Spec.Annotations.Labels,
   532  		Internal:       na.Network.Spec.Internal,
   533  		EnableIPv6:     na.Network.Spec.Ipv6Enabled,
   534  		CheckDuplicate: true,
   535  	}
   536  
   537  	for _, ic := range na.Network.IPAM.Configs {
   538  		c := network.IPAMConfig{
   539  			Subnet:  ic.Subnet,
   540  			IPRange: ic.Range,
   541  			Gateway: ic.Gateway,
   542  		}
   543  		options.IPAM.Config = append(options.IPAM.Config, c)
   544  	}
   545  
   546  	return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
   547  }
   548  
   549  func (c containerConfig) eventFilter() filters.Args {
   550  	filter := filters.NewArgs()
   551  	filter.Add("type", events.ContainerEventType)
   552  	filter.Add("name", c.name())
   553  	filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
   554  	return filter
   555  }