github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/daemon/cluster/executor/container/container.go (about)

     1  package container
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/Sirupsen/logrus"
    11  
    12  	"github.com/docker/docker/api/types"
    13  	enginecontainer "github.com/docker/docker/api/types/container"
    14  	"github.com/docker/docker/api/types/events"
    15  	"github.com/docker/docker/api/types/filters"
    16  	"github.com/docker/docker/api/types/network"
    17  	clustertypes "github.com/docker/docker/daemon/cluster/provider"
    18  	"github.com/docker/docker/reference"
    19  	"github.com/docker/swarmkit/agent/exec"
    20  	"github.com/docker/swarmkit/api"
    21  )
    22  
    23  const (
    24  	// Explicitly use the kernel's default setting for CPU quota of 100ms.
    25  	// https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
    26  	cpuQuotaPeriod = 100 * time.Millisecond
    27  
    28  	// systemLabelPrefix represents the reserved namespace for system labels.
    29  	systemLabelPrefix = "com.docker.swarm"
    30  )
    31  
    32  // containerConfig converts task properties into docker container compatible
    33  // components.
    34  type containerConfig struct {
    35  	task                *api.Task
    36  	networksAttachments map[string]*api.NetworkAttachment
    37  }
    38  
    39  // newContainerConfig returns a validated container config. No methods should
    40  // return an error if this function returns without error.
    41  func newContainerConfig(t *api.Task) (*containerConfig, error) {
    42  	var c containerConfig
    43  	return &c, c.setTask(t)
    44  }
    45  
    46  func (c *containerConfig) setTask(t *api.Task) error {
    47  	if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil {
    48  		return exec.ErrRuntimeUnsupported
    49  	}
    50  
    51  	container := t.Spec.GetContainer()
    52  	if container != nil {
    53  		if container.Image == "" {
    54  			return ErrImageRequired
    55  		}
    56  
    57  		if err := validateMounts(container.Mounts); err != nil {
    58  			return err
    59  		}
    60  	}
    61  
    62  	// index the networks by name
    63  	c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
    64  	for _, attachment := range t.Networks {
    65  		c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
    66  	}
    67  
    68  	c.task = t
    69  	return nil
    70  }
    71  
    72  func (c *containerConfig) id() string {
    73  	attachment := c.task.Spec.GetAttachment()
    74  	if attachment == nil {
    75  		return ""
    76  	}
    77  
    78  	return attachment.ContainerID
    79  }
    80  
    81  func (c *containerConfig) taskID() string {
    82  	return c.task.ID
    83  }
    84  
    85  func (c *containerConfig) endpoint() *api.Endpoint {
    86  	return c.task.Endpoint
    87  }
    88  
    89  func (c *containerConfig) spec() *api.ContainerSpec {
    90  	return c.task.Spec.GetContainer()
    91  }
    92  
    93  func (c *containerConfig) nameOrID() string {
    94  	if c.task.Spec.GetContainer() != nil {
    95  		return c.name()
    96  	}
    97  
    98  	return c.id()
    99  }
   100  
   101  func (c *containerConfig) name() string {
   102  	if c.task.Annotations.Name != "" {
   103  		// if set, use the container Annotations.Name field, set in the orchestrator.
   104  		return c.task.Annotations.Name
   105  	}
   106  
   107  	// fallback to service.slot.id.
   108  	return strings.Join([]string{c.task.ServiceAnnotations.Name, fmt.Sprint(c.task.Slot), c.task.ID}, ".")
   109  }
   110  
   111  func (c *containerConfig) image() string {
   112  	raw := c.spec().Image
   113  	ref, err := reference.ParseNamed(raw)
   114  	if err != nil {
   115  		return raw
   116  	}
   117  	return reference.WithDefaultTag(ref).String()
   118  }
   119  
   120  func (c *containerConfig) config() *enginecontainer.Config {
   121  	config := &enginecontainer.Config{
   122  		Labels:     c.labels(),
   123  		User:       c.spec().User,
   124  		Env:        c.spec().Env,
   125  		WorkingDir: c.spec().Dir,
   126  		Image:      c.image(),
   127  		Volumes:    c.volumes(),
   128  	}
   129  
   130  	if len(c.spec().Command) > 0 {
   131  		// If Command is provided, we replace the whole invocation with Command
   132  		// by replacing Entrypoint and specifying Cmd. Args is ignored in this
   133  		// case.
   134  		config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
   135  		config.Cmd = append(config.Cmd, c.spec().Args...)
   136  	} else if len(c.spec().Args) > 0 {
   137  		// In this case, we assume the image has an Entrypoint and Args
   138  		// specifies the arguments for that entrypoint.
   139  		config.Cmd = c.spec().Args
   140  	}
   141  
   142  	return config
   143  }
   144  
   145  func (c *containerConfig) labels() map[string]string {
   146  	taskName := c.task.Annotations.Name
   147  	if taskName == "" {
   148  		if c.task.Slot != 0 {
   149  			taskName = fmt.Sprintf("%v.%v.%v", c.task.ServiceAnnotations.Name, c.task.Slot, c.task.ID)
   150  		} else {
   151  			taskName = fmt.Sprintf("%v.%v.%v", c.task.ServiceAnnotations.Name, c.task.NodeID, c.task.ID)
   152  		}
   153  	}
   154  	var (
   155  		system = map[string]string{
   156  			"task":         "", // mark as cluster task
   157  			"task.id":      c.task.ID,
   158  			"task.name":    taskName,
   159  			"node.id":      c.task.NodeID,
   160  			"service.id":   c.task.ServiceID,
   161  			"service.name": c.task.ServiceAnnotations.Name,
   162  		}
   163  		labels = make(map[string]string)
   164  	)
   165  
   166  	// base labels are those defined in the spec.
   167  	for k, v := range c.spec().Labels {
   168  		labels[k] = v
   169  	}
   170  
   171  	// we then apply the overrides from the task, which may be set via the
   172  	// orchestrator.
   173  	for k, v := range c.task.Annotations.Labels {
   174  		labels[k] = v
   175  	}
   176  
   177  	// finally, we apply the system labels, which override all labels.
   178  	for k, v := range system {
   179  		labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
   180  	}
   181  
   182  	return labels
   183  }
   184  
   185  // volumes gets placed into the Volumes field on the containerConfig.
   186  func (c *containerConfig) volumes() map[string]struct{} {
   187  	r := make(map[string]struct{})
   188  	// Volumes *only* creates anonymous volumes. The rest is mixed in with
   189  	// binds, which aren't actually binds. Basically, any volume that
   190  	// results in a single component must be added here.
   191  	//
   192  	// This is reversed engineered from the behavior of the engine API.
   193  	for _, mount := range c.spec().Mounts {
   194  		if mount.Type == api.MountTypeVolume && mount.Source == "" {
   195  			r[mount.Target] = struct{}{}
   196  		}
   197  	}
   198  	return r
   199  }
   200  
   201  func (c *containerConfig) tmpfs() map[string]string {
   202  	r := make(map[string]string)
   203  
   204  	for _, spec := range c.spec().Mounts {
   205  		if spec.Type != api.MountTypeTmpfs {
   206  			continue
   207  		}
   208  
   209  		r[spec.Target] = getMountMask(&spec)
   210  	}
   211  
   212  	return r
   213  }
   214  
   215  func (c *containerConfig) binds() []string {
   216  	var r []string
   217  	for _, mount := range c.spec().Mounts {
   218  		if mount.Type == api.MountTypeBind || (mount.Type == api.MountTypeVolume && mount.Source != "") {
   219  			spec := fmt.Sprintf("%s:%s", mount.Source, mount.Target)
   220  			mask := getMountMask(&mount)
   221  			if mask != "" {
   222  				spec = fmt.Sprintf("%s:%s", spec, mask)
   223  			}
   224  			r = append(r, spec)
   225  		}
   226  	}
   227  	return r
   228  }
   229  
   230  func getMountMask(m *api.Mount) string {
   231  	var maskOpts []string
   232  	if m.ReadOnly {
   233  		maskOpts = append(maskOpts, "ro")
   234  	}
   235  
   236  	switch m.Type {
   237  	case api.MountTypeVolume:
   238  		if m.VolumeOptions != nil && m.VolumeOptions.NoCopy {
   239  			maskOpts = append(maskOpts, "nocopy")
   240  		}
   241  	case api.MountTypeBind:
   242  		if m.BindOptions == nil {
   243  			break
   244  		}
   245  
   246  		switch m.BindOptions.Propagation {
   247  		case api.MountPropagationPrivate:
   248  			maskOpts = append(maskOpts, "private")
   249  		case api.MountPropagationRPrivate:
   250  			maskOpts = append(maskOpts, "rprivate")
   251  		case api.MountPropagationShared:
   252  			maskOpts = append(maskOpts, "shared")
   253  		case api.MountPropagationRShared:
   254  			maskOpts = append(maskOpts, "rshared")
   255  		case api.MountPropagationSlave:
   256  			maskOpts = append(maskOpts, "slave")
   257  		case api.MountPropagationRSlave:
   258  			maskOpts = append(maskOpts, "rslave")
   259  		}
   260  	case api.MountTypeTmpfs:
   261  		if m.TmpfsOptions == nil {
   262  			break
   263  		}
   264  
   265  		if m.TmpfsOptions.Mode != 0 {
   266  			maskOpts = append(maskOpts, fmt.Sprintf("mode=%o", m.TmpfsOptions.Mode))
   267  		}
   268  
   269  		if m.TmpfsOptions.SizeBytes != 0 {
   270  			// calculate suffix here, making this linux specific, but that is
   271  			// okay, since API is that way anyways.
   272  
   273  			// we do this by finding the suffix that divides evenly into the
   274  			// value, returing the value itself, with no suffix, if it fails.
   275  			//
   276  			// For the most part, we don't enforce any semantic to this values.
   277  			// The operating system will usually align this and enforce minimum
   278  			// and maximums.
   279  			var (
   280  				size   = m.TmpfsOptions.SizeBytes
   281  				suffix string
   282  			)
   283  			for _, r := range []struct {
   284  				suffix  string
   285  				divisor int64
   286  			}{
   287  				{"g", 1 << 30},
   288  				{"m", 1 << 20},
   289  				{"k", 1 << 10},
   290  			} {
   291  				if size%r.divisor == 0 {
   292  					size = size / r.divisor
   293  					suffix = r.suffix
   294  					break
   295  				}
   296  			}
   297  
   298  			maskOpts = append(maskOpts, fmt.Sprintf("size=%d%s", size, suffix))
   299  		}
   300  	}
   301  
   302  	return strings.Join(maskOpts, ",")
   303  }
   304  
   305  func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
   306  	hc := &enginecontainer.HostConfig{
   307  		Resources: c.resources(),
   308  		Binds:     c.binds(),
   309  		Tmpfs:     c.tmpfs(),
   310  		GroupAdd:  c.spec().Groups,
   311  	}
   312  
   313  	if c.task.LogDriver != nil {
   314  		hc.LogConfig = enginecontainer.LogConfig{
   315  			Type:   c.task.LogDriver.Name,
   316  			Config: c.task.LogDriver.Options,
   317  		}
   318  	}
   319  
   320  	return hc
   321  }
   322  
   323  // This handles the case of volumes that are defined inside a service Mount
   324  func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *types.VolumeCreateRequest {
   325  	var (
   326  		driverName string
   327  		driverOpts map[string]string
   328  		labels     map[string]string
   329  	)
   330  
   331  	if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
   332  		driverName = mount.VolumeOptions.DriverConfig.Name
   333  		driverOpts = mount.VolumeOptions.DriverConfig.Options
   334  		labels = mount.VolumeOptions.Labels
   335  	}
   336  
   337  	if mount.VolumeOptions != nil {
   338  		return &types.VolumeCreateRequest{
   339  			Name:       mount.Source,
   340  			Driver:     driverName,
   341  			DriverOpts: driverOpts,
   342  			Labels:     labels,
   343  		}
   344  	}
   345  	return nil
   346  }
   347  
   348  func (c *containerConfig) resources() enginecontainer.Resources {
   349  	resources := enginecontainer.Resources{}
   350  
   351  	// If no limits are specified let the engine use its defaults.
   352  	//
   353  	// TODO(aluzzardi): We might want to set some limits anyway otherwise
   354  	// "unlimited" tasks will step over the reservation of other tasks.
   355  	r := c.task.Spec.Resources
   356  	if r == nil || r.Limits == nil {
   357  		return resources
   358  	}
   359  
   360  	if r.Limits.MemoryBytes > 0 {
   361  		resources.Memory = r.Limits.MemoryBytes
   362  	}
   363  
   364  	if r.Limits.NanoCPUs > 0 {
   365  		// CPU Period must be set in microseconds.
   366  		resources.CPUPeriod = int64(cpuQuotaPeriod / time.Microsecond)
   367  		resources.CPUQuota = r.Limits.NanoCPUs * resources.CPUPeriod / 1e9
   368  	}
   369  
   370  	return resources
   371  }
   372  
   373  // Docker daemon supports just 1 network during container create.
   374  func (c *containerConfig) createNetworkingConfig() *network.NetworkingConfig {
   375  	var networks []*api.NetworkAttachment
   376  	if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil {
   377  		networks = c.task.Networks
   378  	}
   379  
   380  	epConfig := make(map[string]*network.EndpointSettings)
   381  	if len(networks) > 0 {
   382  		epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0])
   383  	}
   384  
   385  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   386  }
   387  
   388  // TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
   389  func (c *containerConfig) connectNetworkingConfig() *network.NetworkingConfig {
   390  	var networks []*api.NetworkAttachment
   391  	if c.task.Spec.GetContainer() != nil {
   392  		networks = c.task.Networks
   393  	}
   394  
   395  	// First network is used during container create. Other networks are used in "docker network connect"
   396  	if len(networks) < 2 {
   397  		return nil
   398  	}
   399  
   400  	epConfig := make(map[string]*network.EndpointSettings)
   401  	for _, na := range networks[1:] {
   402  		epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na)
   403  	}
   404  	return &network.NetworkingConfig{EndpointsConfig: epConfig}
   405  }
   406  
   407  func getEndpointConfig(na *api.NetworkAttachment) *network.EndpointSettings {
   408  	var ipv4, ipv6 string
   409  	for _, addr := range na.Addresses {
   410  		ip, _, err := net.ParseCIDR(addr)
   411  		if err != nil {
   412  			continue
   413  		}
   414  
   415  		if ip.To4() != nil {
   416  			ipv4 = ip.String()
   417  			continue
   418  		}
   419  
   420  		if ip.To16() != nil {
   421  			ipv6 = ip.String()
   422  		}
   423  	}
   424  
   425  	return &network.EndpointSettings{
   426  		NetworkID: na.Network.ID,
   427  		IPAMConfig: &network.EndpointIPAMConfig{
   428  			IPv4Address: ipv4,
   429  			IPv6Address: ipv6,
   430  		},
   431  	}
   432  }
   433  
   434  func (c *containerConfig) virtualIP(networkID string) string {
   435  	if c.task.Endpoint == nil {
   436  		return ""
   437  	}
   438  
   439  	for _, eVip := range c.task.Endpoint.VirtualIPs {
   440  		// We only support IPv4 VIPs for now.
   441  		if eVip.NetworkID == networkID {
   442  			vip, _, err := net.ParseCIDR(eVip.Addr)
   443  			if err != nil {
   444  				return ""
   445  			}
   446  
   447  			return vip.String()
   448  		}
   449  	}
   450  
   451  	return ""
   452  }
   453  
   454  func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
   455  	if len(c.task.Networks) == 0 {
   456  		return nil
   457  	}
   458  
   459  	logrus.Debugf("Creating service config in agent for t = %+v", c.task)
   460  	svcCfg := &clustertypes.ServiceConfig{
   461  		Name:             c.task.ServiceAnnotations.Name,
   462  		Aliases:          make(map[string][]string),
   463  		ID:               c.task.ServiceID,
   464  		VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
   465  	}
   466  
   467  	for _, na := range c.task.Networks {
   468  		svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
   469  			// We support only IPv4 virtual IP for now.
   470  			IPv4: c.virtualIP(na.Network.ID),
   471  		}
   472  		if len(na.Aliases) > 0 {
   473  			svcCfg.Aliases[na.Network.ID] = na.Aliases
   474  		}
   475  	}
   476  
   477  	if c.task.Endpoint != nil {
   478  		for _, ePort := range c.task.Endpoint.Ports {
   479  			svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
   480  				Name:          ePort.Name,
   481  				Protocol:      int32(ePort.Protocol),
   482  				TargetPort:    ePort.TargetPort,
   483  				PublishedPort: ePort.PublishedPort,
   484  			})
   485  		}
   486  	}
   487  
   488  	return svcCfg
   489  }
   490  
   491  // networks returns a list of network names attached to the container. The
   492  // returned name can be used to lookup the corresponding network create
   493  // options.
   494  func (c *containerConfig) networks() []string {
   495  	var networks []string
   496  
   497  	for name := range c.networksAttachments {
   498  		networks = append(networks, name)
   499  	}
   500  
   501  	return networks
   502  }
   503  
   504  func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
   505  	na, ok := c.networksAttachments[name]
   506  	if !ok {
   507  		return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
   508  	}
   509  
   510  	options := types.NetworkCreate{
   511  		// ID:     na.Network.ID,
   512  		Driver: na.Network.DriverState.Name,
   513  		IPAM: &network.IPAM{
   514  			Driver: na.Network.IPAM.Driver.Name,
   515  		},
   516  		Options:        na.Network.DriverState.Options,
   517  		Labels:         na.Network.Spec.Annotations.Labels,
   518  		Internal:       na.Network.Spec.Internal,
   519  		EnableIPv6:     na.Network.Spec.Ipv6Enabled,
   520  		CheckDuplicate: true,
   521  	}
   522  
   523  	for _, ic := range na.Network.IPAM.Configs {
   524  		c := network.IPAMConfig{
   525  			Subnet:  ic.Subnet,
   526  			IPRange: ic.Range,
   527  			Gateway: ic.Gateway,
   528  		}
   529  		options.IPAM.Config = append(options.IPAM.Config, c)
   530  	}
   531  
   532  	return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
   533  }
   534  
   535  func (c containerConfig) eventFilter() filters.Args {
   536  	filter := filters.NewArgs()
   537  	filter.Add("type", events.ContainerEventType)
   538  	filter.Add("name", c.name())
   539  	filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
   540  	return filter
   541  }