github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/oci_windows.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"path/filepath"
     8  	"runtime"
     9  	"strings"
    10  
    11  	"github.com/Microsoft/hcsshim/osversion"
    12  	containertypes "github.com/docker/docker/api/types/container"
    13  	"github.com/docker/docker/container"
    14  	"github.com/docker/docker/errdefs"
    15  	"github.com/docker/docker/oci"
    16  	"github.com/docker/docker/oci/caps"
    17  	"github.com/docker/docker/pkg/sysinfo"
    18  	"github.com/docker/docker/pkg/system"
    19  	specs "github.com/opencontainers/runtime-spec/specs-go"
    20  	"github.com/pkg/errors"
    21  	"github.com/sirupsen/logrus"
    22  	"golang.org/x/sys/windows/registry"
    23  )
    24  
    25  const (
    26  	credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs`
    27  	credentialSpecFileLocation     = "CredentialSpecs"
    28  )
    29  
    30  func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
    31  
    32  	img, err := daemon.imageService.GetImage(string(c.ImageID), nil)
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  
    37  	s := oci.DefaultOSSpec(img.OS)
    38  
    39  	linkedEnv, err := daemon.setupLinkedContainers(c)
    40  	if err != nil {
    41  		return nil, err
    42  	}
    43  
    44  	// Note, unlike Unix, we do NOT call into SetupWorkingDirectory as
    45  	// this is done in VMCompute. Further, we couldn't do it for Hyper-V
    46  	// containers anyway.
    47  
    48  	if err := daemon.setupSecretDir(c); err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	if err := daemon.setupConfigDir(c); err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	// In s.Mounts
    57  	mounts, err := daemon.setupMounts(c)
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	var isHyperV bool
    63  	if c.HostConfig.Isolation.IsDefault() {
    64  		// Container using default isolation, so take the default from the daemon configuration
    65  		isHyperV = daemon.defaultIsolation.IsHyperV()
    66  	} else {
    67  		// Container may be requesting an explicit isolation mode.
    68  		isHyperV = c.HostConfig.Isolation.IsHyperV()
    69  	}
    70  
    71  	if isHyperV {
    72  		s.Windows.HyperV = &specs.WindowsHyperV{}
    73  	}
    74  
    75  	// If the container has not been started, and has configs or secrets
    76  	// secrets, create symlinks to each config and secret. If it has been
    77  	// started before, the symlinks should have already been created. Also, it
    78  	// is important to not mount a Hyper-V  container that has been started
    79  	// before, to protect the host from the container; for example, from
    80  	// malicious mutation of NTFS data structures.
    81  	if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) {
    82  		// The container file system is mounted before this function is called,
    83  		// except for Hyper-V containers, so mount it here in that case.
    84  		if isHyperV {
    85  			if err := daemon.Mount(c); err != nil {
    86  				return nil, err
    87  			}
    88  			defer daemon.Unmount(c)
    89  		}
    90  		if err := c.CreateSecretSymlinks(); err != nil {
    91  			return nil, err
    92  		}
    93  		if err := c.CreateConfigSymlinks(); err != nil {
    94  			return nil, err
    95  		}
    96  	}
    97  
    98  	secretMounts, err := c.SecretMounts()
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  	if secretMounts != nil {
   103  		mounts = append(mounts, secretMounts...)
   104  	}
   105  
   106  	configMounts := c.ConfigMounts()
   107  	if configMounts != nil {
   108  		mounts = append(mounts, configMounts...)
   109  	}
   110  
   111  	for _, mount := range mounts {
   112  		m := specs.Mount{
   113  			Source:      mount.Source,
   114  			Destination: mount.Destination,
   115  		}
   116  		if !mount.Writable {
   117  			m.Options = append(m.Options, "ro")
   118  		}
   119  		if img.OS != runtime.GOOS {
   120  			m.Type = "bind"
   121  			m.Options = append(m.Options, "rbind")
   122  			m.Options = append(m.Options, fmt.Sprintf("uvmpath=/tmp/gcs/%s/binds", c.ID))
   123  		}
   124  		s.Mounts = append(s.Mounts, m)
   125  	}
   126  
   127  	// In s.Process
   128  	s.Process.Cwd = c.Config.WorkingDir
   129  	s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
   130  	s.Process.Terminal = c.Config.Tty
   131  
   132  	if c.Config.Tty {
   133  		s.Process.ConsoleSize = &specs.Box{
   134  			Height: c.HostConfig.ConsoleSize[0],
   135  			Width:  c.HostConfig.ConsoleSize[1],
   136  		}
   137  	}
   138  	s.Process.User.Username = c.Config.User
   139  	s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer)
   140  	if err != nil {
   141  		return nil, errors.Wrapf(err, "container %s", c.ID)
   142  	}
   143  
   144  	dnsSearch := daemon.getDNSSearchSettings(c)
   145  
   146  	// Get endpoints for the libnetwork allocated networks to the container
   147  	var epList []string
   148  	AllowUnqualifiedDNSQuery := false
   149  	gwHNSID := ""
   150  	if c.NetworkSettings != nil {
   151  		for n := range c.NetworkSettings.Networks {
   152  			sn, err := daemon.FindNetwork(n)
   153  			if err != nil {
   154  				continue
   155  			}
   156  
   157  			ep, err := getEndpointInNetwork(c.Name, sn)
   158  			if err != nil {
   159  				continue
   160  			}
   161  
   162  			data, err := ep.DriverInfo()
   163  			if err != nil {
   164  				continue
   165  			}
   166  
   167  			if data["GW_INFO"] != nil {
   168  				gwInfo := data["GW_INFO"].(map[string]interface{})
   169  				if gwInfo["hnsid"] != nil {
   170  					gwHNSID = gwInfo["hnsid"].(string)
   171  				}
   172  			}
   173  
   174  			if data["hnsid"] != nil {
   175  				epList = append(epList, data["hnsid"].(string))
   176  			}
   177  
   178  			if data["AllowUnqualifiedDNSQuery"] != nil {
   179  				AllowUnqualifiedDNSQuery = true
   180  			}
   181  		}
   182  	}
   183  
   184  	var networkSharedContainerID string
   185  	if c.HostConfig.NetworkMode.IsContainer() {
   186  		networkSharedContainerID = c.NetworkSharedContainerID
   187  		for _, ep := range c.SharedEndpointList {
   188  			epList = append(epList, ep)
   189  		}
   190  	}
   191  
   192  	if gwHNSID != "" {
   193  		epList = append(epList, gwHNSID)
   194  	}
   195  
   196  	s.Windows.Network = &specs.WindowsNetwork{
   197  		AllowUnqualifiedDNSQuery:   AllowUnqualifiedDNSQuery,
   198  		DNSSearchList:              dnsSearch,
   199  		EndpointList:               epList,
   200  		NetworkSharedContainerName: networkSharedContainerID,
   201  	}
   202  
   203  	switch img.OS {
   204  	case "windows":
   205  		if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil {
   206  			return nil, err
   207  		}
   208  	case "linux":
   209  		if !system.LCOWSupported() {
   210  			return nil, fmt.Errorf("Linux containers on Windows are not supported")
   211  		}
   212  		if err := daemon.createSpecLinuxFields(c, &s); err != nil {
   213  			return nil, err
   214  		}
   215  	default:
   216  		return nil, fmt.Errorf("Unsupported platform %q", img.OS)
   217  	}
   218  
   219  	if logrus.IsLevelEnabled(logrus.DebugLevel) {
   220  		if b, err := json.Marshal(&s); err == nil {
   221  			logrus.Debugf("Generated spec: %s", string(b))
   222  		}
   223  	}
   224  
   225  	return (*specs.Spec)(&s), nil
   226  }
   227  
   228  // Sets the Windows-specific fields of the OCI spec
   229  func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error {
   230  
   231  	s.Hostname = c.FullHostname()
   232  
   233  	if len(s.Process.Cwd) == 0 {
   234  		// We default to C:\ to workaround the oddity of the case that the
   235  		// default directory for cmd running as LocalSystem (or
   236  		// ContainerAdministrator) is c:\windows\system32. Hence docker run
   237  		// <image> cmd will by default end in c:\windows\system32, rather
   238  		// than 'root' (/) on Linux. The oddity is that if you have a dockerfile
   239  		// which has no WORKDIR and has a COPY file ., . will be interpreted
   240  		// as c:\. Hence, setting it to default of c:\ makes for consistency.
   241  		s.Process.Cwd = `C:\`
   242  	}
   243  
   244  	if c.Config.ArgsEscaped {
   245  		s.Process.CommandLine = c.Path
   246  		if len(c.Args) > 0 {
   247  			s.Process.CommandLine += " " + system.EscapeArgs(c.Args)
   248  		}
   249  	} else {
   250  		s.Process.Args = append([]string{c.Path}, c.Args...)
   251  	}
   252  	s.Root.Readonly = false // Windows does not support a read-only root filesystem
   253  	if !isHyperV {
   254  		if c.BaseFS == nil {
   255  			return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly nil")
   256  		}
   257  
   258  		s.Root.Path = c.BaseFS.Path() // This is not set for Hyper-V containers
   259  		if !strings.HasSuffix(s.Root.Path, `\`) {
   260  			s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\
   261  		}
   262  	}
   263  
   264  	// First boot optimization
   265  	s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore
   266  
   267  	setResourcesInSpec(c, s, isHyperV)
   268  
   269  	// Read and add credentials from the security options if a credential spec has been provided.
   270  	if err := daemon.setWindowsCredentialSpec(c, s); err != nil {
   271  		return err
   272  	}
   273  
   274  	// Do we have any assigned devices?
   275  	if len(c.HostConfig.Devices) > 0 {
   276  		if isHyperV {
   277  			return errors.New("device assignment is not supported for HyperV containers")
   278  		}
   279  		if osversion.Build() < osversion.RS5 {
   280  			return errors.New("device assignment requires Windows builds RS5 (17763+) or later")
   281  		}
   282  		for _, deviceMapping := range c.HostConfig.Devices {
   283  			srcParts := strings.SplitN(deviceMapping.PathOnHost, "/", 2)
   284  			if len(srcParts) != 2 {
   285  				return errors.New("invalid device assignment path")
   286  			}
   287  			if srcParts[0] != "class" {
   288  				return errors.Errorf("invalid device assignment type: '%s' should be 'class'", srcParts[0])
   289  			}
   290  			wd := specs.WindowsDevice{
   291  				ID:     srcParts[1],
   292  				IDType: srcParts[0],
   293  			}
   294  			s.Windows.Devices = append(s.Windows.Devices, wd)
   295  		}
   296  	}
   297  
   298  	return nil
   299  }
   300  
   301  var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value"))
   302  
   303  // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec`
   304  // field if relevant
   305  func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error {
   306  	if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil {
   307  		return nil
   308  	}
   309  
   310  	// TODO (jrouge/wk8): if provided with several security options, we silently ignore
   311  	// all but the last one (provided they're all valid, otherwise we do return an error);
   312  	// this doesn't seem like a great idea?
   313  	credentialSpec := ""
   314  
   315  	for _, secOpt := range c.HostConfig.SecurityOpt {
   316  		optSplits := strings.SplitN(secOpt, "=", 2)
   317  		if len(optSplits) != 2 {
   318  			return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt))
   319  		}
   320  		if !strings.EqualFold(optSplits[0], "credentialspec") {
   321  			return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", optSplits[0]))
   322  		}
   323  
   324  		credSpecSplits := strings.SplitN(optSplits[1], "://", 2)
   325  		if len(credSpecSplits) != 2 || credSpecSplits[1] == "" {
   326  			return errInvalidCredentialSpecSecOpt
   327  		}
   328  		value := credSpecSplits[1]
   329  
   330  		var err error
   331  		switch strings.ToLower(credSpecSplits[0]) {
   332  		case "file":
   333  			if credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value)); err != nil {
   334  				return errdefs.InvalidParameter(err)
   335  			}
   336  		case "registry":
   337  			if credentialSpec, err = readCredentialSpecRegistry(c.ID, value); err != nil {
   338  				return errdefs.InvalidParameter(err)
   339  			}
   340  		case "config":
   341  			// if the container does not have a DependencyStore, then it
   342  			// isn't swarmkit managed. In order to avoid creating any
   343  			// impression that `config://` is a valid API, return the same
   344  			// error as if you'd passed any other random word.
   345  			if c.DependencyStore == nil {
   346  				return errInvalidCredentialSpecSecOpt
   347  			}
   348  
   349  			csConfig, err := c.DependencyStore.Configs().Get(value)
   350  			if err != nil {
   351  				return errdefs.System(errors.Wrap(err, "error getting value from config store"))
   352  			}
   353  			// stuff the resulting secret data into a string to use as the
   354  			// CredentialSpec
   355  			credentialSpec = string(csConfig.Spec.Data)
   356  		case "raw":
   357  			credentialSpec = value
   358  		default:
   359  			return errInvalidCredentialSpecSecOpt
   360  		}
   361  	}
   362  
   363  	if credentialSpec != "" {
   364  		if s.Windows == nil {
   365  			s.Windows = &specs.Windows{}
   366  		}
   367  		s.Windows.CredentialSpec = credentialSpec
   368  	}
   369  
   370  	return nil
   371  }
   372  
   373  // Sets the Linux-specific fields of the OCI spec
   374  // TODO: LCOW Support. We need to do a lot more pulling in what can
   375  // be pulled in from oci_linux.go.
   376  func (daemon *Daemon) createSpecLinuxFields(c *container.Container, s *specs.Spec) error {
   377  	s.Root = &specs.Root{
   378  		Path:     "rootfs",
   379  		Readonly: c.HostConfig.ReadonlyRootfs,
   380  	}
   381  
   382  	s.Hostname = c.Config.Hostname
   383  	setLinuxDomainname(c, s)
   384  
   385  	if len(s.Process.Cwd) == 0 {
   386  		s.Process.Cwd = `/`
   387  	}
   388  	s.Process.Args = append([]string{c.Path}, c.Args...)
   389  
   390  	// Note these are against the UVM.
   391  	setResourcesInSpec(c, s, true) // LCOW is Hyper-V only
   392  
   393  	capabilities, err := caps.TweakCapabilities(caps.DefaultCapabilities(), c.HostConfig.CapAdd, c.HostConfig.CapDrop, c.HostConfig.Capabilities, c.HostConfig.Privileged)
   394  	if err != nil {
   395  		return fmt.Errorf("linux spec capabilities: %v", err)
   396  	}
   397  	if err := oci.SetCapabilities(s, capabilities); err != nil {
   398  		return fmt.Errorf("linux spec capabilities: %v", err)
   399  	}
   400  	devPermissions, err := oci.AppendDevicePermissionsFromCgroupRules(nil, c.HostConfig.DeviceCgroupRules)
   401  	if err != nil {
   402  		return fmt.Errorf("linux runtime spec devices: %v", err)
   403  	}
   404  	s.Linux.Resources.Devices = devPermissions
   405  	return nil
   406  }
   407  
   408  func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) {
   409  	// In s.Windows.Resources
   410  	cpuShares := uint16(c.HostConfig.CPUShares)
   411  	cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100
   412  	cpuCount := uint64(c.HostConfig.CPUCount)
   413  	if c.HostConfig.NanoCPUs > 0 {
   414  		if isHyperV {
   415  			cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9)
   416  			leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9
   417  			if leftoverNanoCPUs != 0 {
   418  				cpuCount++
   419  				cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000))
   420  				if cpuMaximum < 1 {
   421  					// The requested NanoCPUs is so small that we rounded to 0, use 1 instead
   422  					cpuMaximum = 1
   423  				}
   424  			}
   425  		} else {
   426  			cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000))
   427  			if cpuMaximum < 1 {
   428  				// The requested NanoCPUs is so small that we rounded to 0, use 1 instead
   429  				cpuMaximum = 1
   430  			}
   431  		}
   432  	}
   433  
   434  	if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 {
   435  		if s.Windows.Resources == nil {
   436  			s.Windows.Resources = &specs.WindowsResources{}
   437  		}
   438  		s.Windows.Resources.CPU = &specs.WindowsCPUResources{
   439  			Maximum: &cpuMaximum,
   440  			Shares:  &cpuShares,
   441  			Count:   &cpuCount,
   442  		}
   443  	}
   444  
   445  	memoryLimit := uint64(c.HostConfig.Memory)
   446  	if memoryLimit != 0 {
   447  		if s.Windows.Resources == nil {
   448  			s.Windows.Resources = &specs.WindowsResources{}
   449  		}
   450  		s.Windows.Resources.Memory = &specs.WindowsMemoryResources{
   451  			Limit: &memoryLimit,
   452  		}
   453  	}
   454  
   455  	if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 {
   456  		if s.Windows.Resources == nil {
   457  			s.Windows.Resources = &specs.WindowsResources{}
   458  		}
   459  		s.Windows.Resources.Storage = &specs.WindowsStorageResources{
   460  			Bps:  &c.HostConfig.IOMaximumBandwidth,
   461  			Iops: &c.HostConfig.IOMaximumIOps,
   462  		}
   463  	}
   464  }
   465  
   466  // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
   467  // It will do nothing on non-Linux platform
   468  func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
   469  	return
   470  }
   471  
   472  // registryKey is an interface wrapper around `registry.Key`,
   473  // listing only the methods we care about here.
   474  // It's mainly useful to easily allow mocking the registry in tests.
   475  type registryKey interface {
   476  	GetStringValue(name string) (val string, valtype uint32, err error)
   477  	Close() error
   478  }
   479  
   480  var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) {
   481  	return registry.OpenKey(baseKey, path, access)
   482  }
   483  
   484  // readCredentialSpecRegistry is a helper function to read a credential spec from
   485  // the registry. If not found, we return an empty string and warn in the log.
   486  // This allows for staging on machines which do not have the necessary components.
   487  func readCredentialSpecRegistry(id, name string) (string, error) {
   488  	key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE)
   489  	if err != nil {
   490  		return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation)
   491  	}
   492  	defer key.Close()
   493  
   494  	value, _, err := key.GetStringValue(name)
   495  	if err != nil {
   496  		if err == registry.ErrNotExist {
   497  			return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id)
   498  		}
   499  		return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id)
   500  	}
   501  
   502  	return value, nil
   503  }
   504  
   505  // readCredentialSpecFile is a helper function to read a credential spec from
   506  // a file. If not found, we return an empty string and warn in the log.
   507  // This allows for staging on machines which do not have the necessary components.
   508  func readCredentialSpecFile(id, root, location string) (string, error) {
   509  	if filepath.IsAbs(location) {
   510  		return "", fmt.Errorf("invalid credential spec - file:// path cannot be absolute")
   511  	}
   512  	base := filepath.Join(root, credentialSpecFileLocation)
   513  	full := filepath.Join(base, location)
   514  	if !strings.HasPrefix(full, base) {
   515  		return "", fmt.Errorf("invalid credential spec - file:// path must be under %s", base)
   516  	}
   517  	bcontents, err := ioutil.ReadFile(full)
   518  	if err != nil {
   519  		return "", errors.Wrapf(err, "credential spec for container %s could not be read from file %q", id, full)
   520  	}
   521  	return string(bcontents[:]), nil
   522  }