github.com/moby/docker@v26.1.3+incompatible/daemon/oci_windows.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"github.com/Microsoft/hcsshim"
    12  	coci "github.com/containerd/containerd/oci"
    13  	"github.com/containerd/log"
    14  	"github.com/docker/docker/api/types/backend"
    15  	containertypes "github.com/docker/docker/api/types/container"
    16  	"github.com/docker/docker/container"
    17  	"github.com/docker/docker/daemon/config"
    18  	"github.com/docker/docker/errdefs"
    19  	"github.com/docker/docker/image"
    20  	"github.com/docker/docker/oci"
    21  	"github.com/docker/docker/pkg/sysinfo"
    22  	"github.com/docker/docker/pkg/system"
    23  	specs "github.com/opencontainers/runtime-spec/specs-go"
    24  	"github.com/pkg/errors"
    25  	"golang.org/x/sys/windows/registry"
    26  )
    27  
    28  const (
    29  	credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs`
    30  	credentialSpecFileLocation     = "CredentialSpecs"
    31  )
    32  
    33  // setupContainerDirs sets up base container directories (root, ipc, tmpfs and secrets).
    34  func (daemon *Daemon) setupContainerDirs(c *container.Container) ([]container.Mount, error) {
    35  	// Note, unlike Unix, we do NOT call into SetupWorkingDirectory as
    36  	// this is done in VMCompute. Further, we couldn't do it for Hyper-V
    37  	// containers anyway.
    38  	if err := daemon.setupSecretDir(c); err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	if err := daemon.setupConfigDir(c); err != nil {
    43  		return nil, err
    44  	}
    45  
    46  	// If the container has not been started, and has configs or secrets
    47  	// secrets, create symlinks to each config and secret. If it has been
    48  	// started before, the symlinks should have already been created. Also, it
    49  	// is important to not mount a Hyper-V  container that has been started
    50  	// before, to protect the host from the container; for example, from
    51  	// malicious mutation of NTFS data structures.
    52  	if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) {
    53  		// The container file system is mounted before this function is called,
    54  		// except for Hyper-V containers, so mount it here in that case.
    55  		if daemon.isHyperV(c) {
    56  			if err := daemon.Mount(c); err != nil {
    57  				return nil, err
    58  			}
    59  			defer daemon.Unmount(c)
    60  		}
    61  		if err := c.CreateSecretSymlinks(); err != nil {
    62  			return nil, err
    63  		}
    64  		if err := c.CreateConfigSymlinks(); err != nil {
    65  			return nil, err
    66  		}
    67  	}
    68  
    69  	secretMounts, err := c.SecretMounts()
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  
    74  	var mounts []container.Mount
    75  	if secretMounts != nil {
    76  		mounts = append(mounts, secretMounts...)
    77  	}
    78  
    79  	if configMounts := c.ConfigMounts(); configMounts != nil {
    80  		mounts = append(mounts, configMounts...)
    81  	}
    82  
    83  	return mounts, nil
    84  }
    85  
    86  func (daemon *Daemon) isHyperV(c *container.Container) bool {
    87  	if c.HostConfig.Isolation.IsDefault() {
    88  		// Container using default isolation, so take the default from the daemon configuration
    89  		return daemon.defaultIsolation.IsHyperV()
    90  	}
    91  	// Container may be requesting an explicit isolation mode.
    92  	return c.HostConfig.Isolation.IsHyperV()
    93  }
    94  
    95  func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container, mounts []container.Mount) (*specs.Spec, error) {
    96  	img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), backend.GetImageOpts{})
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	if err := image.CheckOS(img.OperatingSystem()); err != nil {
   101  		return nil, err
   102  	}
   103  
   104  	s := oci.DefaultSpec()
   105  
   106  	if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	for _, mount := range mounts {
   111  		m := specs.Mount{
   112  			Source:      mount.Source,
   113  			Destination: mount.Destination,
   114  		}
   115  		if !mount.Writable {
   116  			m.Options = append(m.Options, "ro")
   117  		}
   118  		s.Mounts = append(s.Mounts, m)
   119  	}
   120  
   121  	linkedEnv, err := daemon.setupLinkedContainers(c)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	isHyperV := daemon.isHyperV(c)
   127  	if isHyperV {
   128  		s.Windows.HyperV = &specs.WindowsHyperV{}
   129  	}
   130  
   131  	// In s.Process
   132  	s.Process.Cwd = c.Config.WorkingDir
   133  	s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
   134  	s.Process.Terminal = c.Config.Tty
   135  
   136  	if c.Config.Tty {
   137  		s.Process.ConsoleSize = &specs.Box{
   138  			Height: c.HostConfig.ConsoleSize[0],
   139  			Width:  c.HostConfig.ConsoleSize[1],
   140  		}
   141  	}
   142  	s.Process.User.Username = c.Config.User
   143  	s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer, c.ID)
   144  	if err != nil {
   145  		return nil, errors.Wrapf(err, "GetLayerFolders failed: container %s", c.ID)
   146  	}
   147  
   148  	// Get endpoints for the libnetwork allocated networks to the container
   149  	var epList []string
   150  	AllowUnqualifiedDNSQuery := false
   151  	gwHNSID := ""
   152  	if c.NetworkSettings != nil {
   153  		for n := range c.NetworkSettings.Networks {
   154  			sn, err := daemon.FindNetwork(n)
   155  			if err != nil {
   156  				continue
   157  			}
   158  
   159  			ep, err := getEndpointInNetwork(c.Name, sn)
   160  			if err != nil {
   161  				continue
   162  			}
   163  
   164  			data, err := ep.DriverInfo()
   165  			if err != nil {
   166  				continue
   167  			}
   168  
   169  			if data["GW_INFO"] != nil {
   170  				gwInfo := data["GW_INFO"].(map[string]interface{})
   171  				if gwInfo["hnsid"] != nil {
   172  					gwHNSID = gwInfo["hnsid"].(string)
   173  				}
   174  			}
   175  
   176  			if data["hnsid"] != nil {
   177  				epList = append(epList, data["hnsid"].(string))
   178  			}
   179  
   180  			if data["AllowUnqualifiedDNSQuery"] != nil {
   181  				AllowUnqualifiedDNSQuery = true
   182  			}
   183  		}
   184  	}
   185  
   186  	var networkSharedContainerID string
   187  	if c.HostConfig.NetworkMode.IsContainer() {
   188  		networkSharedContainerID = c.NetworkSharedContainerID
   189  		for _, ep := range c.SharedEndpointList {
   190  			epList = append(epList, ep)
   191  		}
   192  	}
   193  
   194  	if gwHNSID != "" {
   195  		epList = append(epList, gwHNSID)
   196  	}
   197  
   198  	var dnsSearch []string
   199  	if len(c.HostConfig.DNSSearch) > 0 {
   200  		dnsSearch = c.HostConfig.DNSSearch
   201  	} else if len(daemonCfg.DNSSearch) > 0 {
   202  		dnsSearch = daemonCfg.DNSSearch
   203  	}
   204  
   205  	s.Windows.Network = &specs.WindowsNetwork{
   206  		AllowUnqualifiedDNSQuery:   AllowUnqualifiedDNSQuery,
   207  		DNSSearchList:              dnsSearch,
   208  		EndpointList:               epList,
   209  		NetworkSharedContainerName: networkSharedContainerID,
   210  	}
   211  
   212  	if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil {
   213  		return nil, err
   214  	}
   215  
   216  	if log.G(ctx).Level >= log.DebugLevel {
   217  		if b, err := json.Marshal(&s); err == nil {
   218  			log.G(ctx).Debugf("Generated spec: %s", string(b))
   219  		}
   220  	}
   221  
   222  	return &s, nil
   223  }
   224  
   225  // Sets the Windows-specific fields of the OCI spec
   226  func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error {
   227  	s.Hostname = c.FullHostname()
   228  
   229  	if len(s.Process.Cwd) == 0 {
   230  		// We default to C:\ to workaround the oddity of the case that the
   231  		// default directory for cmd running as LocalSystem (or
   232  		// ContainerAdministrator) is c:\windows\system32. Hence docker run
   233  		// <image> cmd will by default end in c:\windows\system32, rather
   234  		// than 'root' (/) on Linux. The oddity is that if you have a dockerfile
   235  		// which has no WORKDIR and has a COPY file ., . will be interpreted
   236  		// as c:\. Hence, setting it to default of c:\ makes for consistency.
   237  		s.Process.Cwd = `C:\`
   238  	}
   239  
   240  	if c.Config.ArgsEscaped {
   241  		s.Process.CommandLine = c.Path
   242  		if len(c.Args) > 0 {
   243  			s.Process.CommandLine += " " + system.EscapeArgs(c.Args)
   244  		}
   245  	} else {
   246  		s.Process.Args = append([]string{c.Path}, c.Args...)
   247  	}
   248  	s.Root.Readonly = false // Windows does not support a read-only root filesystem
   249  	if !isHyperV {
   250  		if c.BaseFS == "" {
   251  			return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly empty")
   252  		}
   253  
   254  		if daemon.UsesSnapshotter() {
   255  			// daemon.Mount() for the snapshotters actually mounts the filesystem to the host
   256  			// using containerd/mount.All and BaseFS is the directory where this is mounted.
   257  			// This is consistent with Linux-based graphdriver implementations.
   258  			// For the windowsfilter graphdriver, the underlying Get() call does not actually mount
   259  			// the filesystem to a path, and BaseFS is the Volume GUID of the prepared/activated
   260  			// filesystem.
   261  
   262  			// The spec for Root.Path for Windows specifies that for Process-isolated containers,
   263  			// it must be in the Volume GUID (\\?\\Volume{GUID} style), not a host-mounted directory.
   264  			backingDevicePath, err := getBackingDeviceForContainerdMount(c.BaseFS)
   265  			if err != nil {
   266  				return errors.Wrapf(err, "createSpecWindowsFields: Failed to get backing device of BaseFS of container %s", c.ID)
   267  			}
   268  			s.Root.Path = backingDevicePath
   269  		} else {
   270  			s.Root.Path = c.BaseFS // This is not set for Hyper-V containers
   271  		}
   272  		if !strings.HasSuffix(s.Root.Path, `\`) {
   273  			s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\
   274  		}
   275  	}
   276  
   277  	// First boot optimization
   278  	s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore
   279  
   280  	setResourcesInSpec(c, s, isHyperV)
   281  
   282  	// Read and add credentials from the security options if a credential spec has been provided.
   283  	if err := daemon.setWindowsCredentialSpec(c, s); err != nil {
   284  		return err
   285  	}
   286  
   287  	devices, err := setupWindowsDevices(c.HostConfig.Devices)
   288  	if err != nil {
   289  		return err
   290  	}
   291  
   292  	s.Windows.Devices = append(s.Windows.Devices, devices...)
   293  
   294  	return nil
   295  }
   296  
   297  // getBackingDeviceForContainerdMount extracts the backing device or directory mounted at mountPoint
   298  // by containerd's mount.Mount implementation for Windows.
   299  func getBackingDeviceForContainerdMount(mountPoint string) (string, error) {
   300  	// NOTE: This relies on details of the behaviour of containerd's mount implementation for Windows,
   301  	// and so is somewhat fragile.
   302  	// TODO: Upstream this into the mount package.
   303  	// The implementation would be the same, but it'll be better-encapsulated.
   304  
   305  	// See containerd/containerd/mount/mount_windows.go
   306  	// This is mostly just copied from mount.Unmount
   307  
   308  	const sourceStreamName = "containerd.io-source"
   309  
   310  	mountPoint = filepath.Clean(mountPoint)
   311  	adsFile := mountPoint + ":" + sourceStreamName
   312  	var layerPath string
   313  
   314  	if _, err := os.Lstat(adsFile); err == nil {
   315  		layerPathb, err := os.ReadFile(mountPoint + ":" + sourceStreamName)
   316  		if err != nil {
   317  			return "", fmt.Errorf("failed to retrieve layer source for mount %s: %w", mountPoint, err)
   318  		}
   319  		layerPath = string(layerPathb)
   320  	}
   321  
   322  	if layerPath == "" {
   323  		return "", fmt.Errorf("no layer source for mount %s", mountPoint)
   324  	}
   325  
   326  	home, layerID := filepath.Split(layerPath)
   327  	di := hcsshim.DriverInfo{
   328  		HomeDir: home,
   329  	}
   330  
   331  	backingDevice, err := hcsshim.GetLayerMountPath(di, layerID)
   332  	if err != nil {
   333  		return "", fmt.Errorf("failed to retrieve backing device for layer %s: %w", mountPoint, err)
   334  	}
   335  
   336  	return backingDevice, nil
   337  }
   338  
   339  var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value"))
   340  
   341  // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec`
   342  // field if relevant
   343  func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error {
   344  	if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil {
   345  		return nil
   346  	}
   347  
   348  	// TODO (jrouge/wk8): if provided with several security options, we silently ignore
   349  	// all but the last one (provided they're all valid, otherwise we do return an error);
   350  	// this doesn't seem like a great idea?
   351  	credentialSpec := ""
   352  
   353  	// TODO(thaJeztah): extract validating and parsing SecurityOpt to a reusable function.
   354  	for _, secOpt := range c.HostConfig.SecurityOpt {
   355  		k, v, ok := strings.Cut(secOpt, "=")
   356  		if !ok {
   357  			return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt))
   358  		}
   359  		// FIXME(thaJeztah): options should not be case-insensitive
   360  		if !strings.EqualFold(k, "credentialspec") {
   361  			return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", k))
   362  		}
   363  
   364  		scheme, value, ok := strings.Cut(v, "://")
   365  		if !ok || value == "" {
   366  			return errInvalidCredentialSpecSecOpt
   367  		}
   368  		var err error
   369  		switch strings.ToLower(scheme) {
   370  		case "file":
   371  			credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value))
   372  			if err != nil {
   373  				return errdefs.InvalidParameter(err)
   374  			}
   375  		case "registry":
   376  			credentialSpec, err = readCredentialSpecRegistry(c.ID, value)
   377  			if err != nil {
   378  				return errdefs.InvalidParameter(err)
   379  			}
   380  		case "config":
   381  			// if the container does not have a DependencyStore, then it
   382  			// isn't swarmkit managed. In order to avoid creating any
   383  			// impression that `config://` is a valid API, return the same
   384  			// error as if you'd passed any other random word.
   385  			if c.DependencyStore == nil {
   386  				return errInvalidCredentialSpecSecOpt
   387  			}
   388  
   389  			csConfig, err := c.DependencyStore.Configs().Get(value)
   390  			if err != nil {
   391  				return errdefs.System(errors.Wrap(err, "error getting value from config store"))
   392  			}
   393  			// stuff the resulting secret data into a string to use as the
   394  			// CredentialSpec
   395  			credentialSpec = string(csConfig.Spec.Data)
   396  		case "raw":
   397  			credentialSpec = value
   398  		default:
   399  			return errInvalidCredentialSpecSecOpt
   400  		}
   401  	}
   402  
   403  	if credentialSpec != "" {
   404  		if s.Windows == nil {
   405  			s.Windows = &specs.Windows{}
   406  		}
   407  		s.Windows.CredentialSpec = credentialSpec
   408  	}
   409  
   410  	return nil
   411  }
   412  
   413  func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) {
   414  	// In s.Windows.Resources
   415  	cpuShares := uint16(c.HostConfig.CPUShares)
   416  	cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100
   417  	cpuCount := uint64(c.HostConfig.CPUCount)
   418  	if c.HostConfig.NanoCPUs > 0 {
   419  		if isHyperV {
   420  			cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9)
   421  			leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9
   422  			if leftoverNanoCPUs != 0 {
   423  				cpuCount++
   424  				cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000))
   425  				if cpuMaximum < 1 {
   426  					// The requested NanoCPUs is so small that we rounded to 0, use 1 instead
   427  					cpuMaximum = 1
   428  				}
   429  			}
   430  		} else {
   431  			cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000))
   432  			if cpuMaximum < 1 {
   433  				// The requested NanoCPUs is so small that we rounded to 0, use 1 instead
   434  				cpuMaximum = 1
   435  			}
   436  		}
   437  	}
   438  
   439  	if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 {
   440  		if s.Windows.Resources == nil {
   441  			s.Windows.Resources = &specs.WindowsResources{}
   442  		}
   443  		s.Windows.Resources.CPU = &specs.WindowsCPUResources{
   444  			Maximum: &cpuMaximum,
   445  			Shares:  &cpuShares,
   446  			Count:   &cpuCount,
   447  		}
   448  	}
   449  
   450  	memoryLimit := uint64(c.HostConfig.Memory)
   451  	if memoryLimit != 0 {
   452  		if s.Windows.Resources == nil {
   453  			s.Windows.Resources = &specs.WindowsResources{}
   454  		}
   455  		s.Windows.Resources.Memory = &specs.WindowsMemoryResources{
   456  			Limit: &memoryLimit,
   457  		}
   458  	}
   459  
   460  	if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 {
   461  		if s.Windows.Resources == nil {
   462  			s.Windows.Resources = &specs.WindowsResources{}
   463  		}
   464  		s.Windows.Resources.Storage = &specs.WindowsStorageResources{
   465  			Bps:  &c.HostConfig.IOMaximumBandwidth,
   466  			Iops: &c.HostConfig.IOMaximumIOps,
   467  		}
   468  	}
   469  }
   470  
   471  // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
   472  // It will do nothing on non-Linux platform
   473  func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig, daemonCfg *config.Config) {
   474  	return
   475  }
   476  
   477  // registryKey is an interface wrapper around `registry.Key`,
   478  // listing only the methods we care about here.
   479  // It's mainly useful to easily allow mocking the registry in tests.
   480  type registryKey interface {
   481  	GetStringValue(name string) (val string, valtype uint32, err error)
   482  	Close() error
   483  }
   484  
   485  var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) {
   486  	return registry.OpenKey(baseKey, path, access)
   487  }
   488  
   489  // readCredentialSpecRegistry is a helper function to read a credential spec from
   490  // the registry. If not found, we return an empty string and warn in the log.
   491  // This allows for staging on machines which do not have the necessary components.
   492  func readCredentialSpecRegistry(id, name string) (string, error) {
   493  	key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE)
   494  	if err != nil {
   495  		return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation)
   496  	}
   497  	defer key.Close()
   498  
   499  	value, _, err := key.GetStringValue(name)
   500  	if err != nil {
   501  		if err == registry.ErrNotExist {
   502  			return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id)
   503  		}
   504  		return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id)
   505  	}
   506  
   507  	return value, nil
   508  }
   509  
   510  // readCredentialSpecFile is a helper function to read a credential spec from
   511  // a file. If not found, we return an empty string and warn in the log.
   512  // This allows for staging on machines which do not have the necessary components.
   513  func readCredentialSpecFile(id, root, location string) (string, error) {
   514  	if filepath.IsAbs(location) {
   515  		return "", fmt.Errorf("invalid credential spec: file:// path cannot be absolute")
   516  	}
   517  	base := filepath.Join(root, credentialSpecFileLocation)
   518  	full := filepath.Join(base, location)
   519  	if !strings.HasPrefix(full, base) {
   520  		return "", fmt.Errorf("invalid credential spec: file:// path must be under %s", base)
   521  	}
   522  	bcontents, err := os.ReadFile(full)
   523  	if err != nil {
   524  		return "", errors.Wrapf(err, "failed to load credential spec for container %s", id)
   525  	}
   526  	return string(bcontents[:]), nil
   527  }
   528  
   529  func setupWindowsDevices(devices []containertypes.DeviceMapping) (specDevices []specs.WindowsDevice, err error) {
   530  	for _, deviceMapping := range devices {
   531  		if strings.HasPrefix(deviceMapping.PathOnHost, "class/") {
   532  			specDevices = append(specDevices, specs.WindowsDevice{
   533  				ID:     strings.TrimPrefix(deviceMapping.PathOnHost, "class/"),
   534  				IDType: "class",
   535  			})
   536  		} else {
   537  			idType, id, ok := strings.Cut(deviceMapping.PathOnHost, "://")
   538  			if !ok {
   539  				return nil, errors.Errorf("invalid device assignment path: '%s', must be 'class/ID' or 'IDType://ID'", deviceMapping.PathOnHost)
   540  			}
   541  			if idType == "" {
   542  				return nil, errors.Errorf("invalid device assignment path: '%s', IDType cannot be empty", deviceMapping.PathOnHost)
   543  			}
   544  			specDevices = append(specDevices, specs.WindowsDevice{
   545  				ID:     id,
   546  				IDType: idType,
   547  			})
   548  		}
   549  	}
   550  
   551  	return specDevices, nil
   552  }