github.com/moby/docker@v26.1.3+incompatible/daemon/oci_windows.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "github.com/Microsoft/hcsshim" 12 coci "github.com/containerd/containerd/oci" 13 "github.com/containerd/log" 14 "github.com/docker/docker/api/types/backend" 15 containertypes "github.com/docker/docker/api/types/container" 16 "github.com/docker/docker/container" 17 "github.com/docker/docker/daemon/config" 18 "github.com/docker/docker/errdefs" 19 "github.com/docker/docker/image" 20 "github.com/docker/docker/oci" 21 "github.com/docker/docker/pkg/sysinfo" 22 "github.com/docker/docker/pkg/system" 23 specs "github.com/opencontainers/runtime-spec/specs-go" 24 "github.com/pkg/errors" 25 "golang.org/x/sys/windows/registry" 26 ) 27 28 const ( 29 credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs` 30 credentialSpecFileLocation = "CredentialSpecs" 31 ) 32 33 // setupContainerDirs sets up base container directories (root, ipc, tmpfs and secrets). 34 func (daemon *Daemon) setupContainerDirs(c *container.Container) ([]container.Mount, error) { 35 // Note, unlike Unix, we do NOT call into SetupWorkingDirectory as 36 // this is done in VMCompute. Further, we couldn't do it for Hyper-V 37 // containers anyway. 38 if err := daemon.setupSecretDir(c); err != nil { 39 return nil, err 40 } 41 42 if err := daemon.setupConfigDir(c); err != nil { 43 return nil, err 44 } 45 46 // If the container has not been started, and has configs or secrets 47 // secrets, create symlinks to each config and secret. If it has been 48 // started before, the symlinks should have already been created. Also, it 49 // is important to not mount a Hyper-V container that has been started 50 // before, to protect the host from the container; for example, from 51 // malicious mutation of NTFS data structures. 52 if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) { 53 // The container file system is mounted before this function is called, 54 // except for Hyper-V containers, so mount it here in that case. 55 if daemon.isHyperV(c) { 56 if err := daemon.Mount(c); err != nil { 57 return nil, err 58 } 59 defer daemon.Unmount(c) 60 } 61 if err := c.CreateSecretSymlinks(); err != nil { 62 return nil, err 63 } 64 if err := c.CreateConfigSymlinks(); err != nil { 65 return nil, err 66 } 67 } 68 69 secretMounts, err := c.SecretMounts() 70 if err != nil { 71 return nil, err 72 } 73 74 var mounts []container.Mount 75 if secretMounts != nil { 76 mounts = append(mounts, secretMounts...) 77 } 78 79 if configMounts := c.ConfigMounts(); configMounts != nil { 80 mounts = append(mounts, configMounts...) 81 } 82 83 return mounts, nil 84 } 85 86 func (daemon *Daemon) isHyperV(c *container.Container) bool { 87 if c.HostConfig.Isolation.IsDefault() { 88 // Container using default isolation, so take the default from the daemon configuration 89 return daemon.defaultIsolation.IsHyperV() 90 } 91 // Container may be requesting an explicit isolation mode. 92 return c.HostConfig.Isolation.IsHyperV() 93 } 94 95 func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container, mounts []container.Mount) (*specs.Spec, error) { 96 img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), backend.GetImageOpts{}) 97 if err != nil { 98 return nil, err 99 } 100 if err := image.CheckOS(img.OperatingSystem()); err != nil { 101 return nil, err 102 } 103 104 s := oci.DefaultSpec() 105 106 if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil { 107 return nil, err 108 } 109 110 for _, mount := range mounts { 111 m := specs.Mount{ 112 Source: mount.Source, 113 Destination: mount.Destination, 114 } 115 if !mount.Writable { 116 m.Options = append(m.Options, "ro") 117 } 118 s.Mounts = append(s.Mounts, m) 119 } 120 121 linkedEnv, err := daemon.setupLinkedContainers(c) 122 if err != nil { 123 return nil, err 124 } 125 126 isHyperV := daemon.isHyperV(c) 127 if isHyperV { 128 s.Windows.HyperV = &specs.WindowsHyperV{} 129 } 130 131 // In s.Process 132 s.Process.Cwd = c.Config.WorkingDir 133 s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv) 134 s.Process.Terminal = c.Config.Tty 135 136 if c.Config.Tty { 137 s.Process.ConsoleSize = &specs.Box{ 138 Height: c.HostConfig.ConsoleSize[0], 139 Width: c.HostConfig.ConsoleSize[1], 140 } 141 } 142 s.Process.User.Username = c.Config.User 143 s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer, c.ID) 144 if err != nil { 145 return nil, errors.Wrapf(err, "GetLayerFolders failed: container %s", c.ID) 146 } 147 148 // Get endpoints for the libnetwork allocated networks to the container 149 var epList []string 150 AllowUnqualifiedDNSQuery := false 151 gwHNSID := "" 152 if c.NetworkSettings != nil { 153 for n := range c.NetworkSettings.Networks { 154 sn, err := daemon.FindNetwork(n) 155 if err != nil { 156 continue 157 } 158 159 ep, err := getEndpointInNetwork(c.Name, sn) 160 if err != nil { 161 continue 162 } 163 164 data, err := ep.DriverInfo() 165 if err != nil { 166 continue 167 } 168 169 if data["GW_INFO"] != nil { 170 gwInfo := data["GW_INFO"].(map[string]interface{}) 171 if gwInfo["hnsid"] != nil { 172 gwHNSID = gwInfo["hnsid"].(string) 173 } 174 } 175 176 if data["hnsid"] != nil { 177 epList = append(epList, data["hnsid"].(string)) 178 } 179 180 if data["AllowUnqualifiedDNSQuery"] != nil { 181 AllowUnqualifiedDNSQuery = true 182 } 183 } 184 } 185 186 var networkSharedContainerID string 187 if c.HostConfig.NetworkMode.IsContainer() { 188 networkSharedContainerID = c.NetworkSharedContainerID 189 for _, ep := range c.SharedEndpointList { 190 epList = append(epList, ep) 191 } 192 } 193 194 if gwHNSID != "" { 195 epList = append(epList, gwHNSID) 196 } 197 198 var dnsSearch []string 199 if len(c.HostConfig.DNSSearch) > 0 { 200 dnsSearch = c.HostConfig.DNSSearch 201 } else if len(daemonCfg.DNSSearch) > 0 { 202 dnsSearch = daemonCfg.DNSSearch 203 } 204 205 s.Windows.Network = &specs.WindowsNetwork{ 206 AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery, 207 DNSSearchList: dnsSearch, 208 EndpointList: epList, 209 NetworkSharedContainerName: networkSharedContainerID, 210 } 211 212 if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil { 213 return nil, err 214 } 215 216 if log.G(ctx).Level >= log.DebugLevel { 217 if b, err := json.Marshal(&s); err == nil { 218 log.G(ctx).Debugf("Generated spec: %s", string(b)) 219 } 220 } 221 222 return &s, nil 223 } 224 225 // Sets the Windows-specific fields of the OCI spec 226 func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error { 227 s.Hostname = c.FullHostname() 228 229 if len(s.Process.Cwd) == 0 { 230 // We default to C:\ to workaround the oddity of the case that the 231 // default directory for cmd running as LocalSystem (or 232 // ContainerAdministrator) is c:\windows\system32. Hence docker run 233 // <image> cmd will by default end in c:\windows\system32, rather 234 // than 'root' (/) on Linux. The oddity is that if you have a dockerfile 235 // which has no WORKDIR and has a COPY file ., . will be interpreted 236 // as c:\. Hence, setting it to default of c:\ makes for consistency. 237 s.Process.Cwd = `C:\` 238 } 239 240 if c.Config.ArgsEscaped { 241 s.Process.CommandLine = c.Path 242 if len(c.Args) > 0 { 243 s.Process.CommandLine += " " + system.EscapeArgs(c.Args) 244 } 245 } else { 246 s.Process.Args = append([]string{c.Path}, c.Args...) 247 } 248 s.Root.Readonly = false // Windows does not support a read-only root filesystem 249 if !isHyperV { 250 if c.BaseFS == "" { 251 return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly empty") 252 } 253 254 if daemon.UsesSnapshotter() { 255 // daemon.Mount() for the snapshotters actually mounts the filesystem to the host 256 // using containerd/mount.All and BaseFS is the directory where this is mounted. 257 // This is consistent with Linux-based graphdriver implementations. 258 // For the windowsfilter graphdriver, the underlying Get() call does not actually mount 259 // the filesystem to a path, and BaseFS is the Volume GUID of the prepared/activated 260 // filesystem. 261 262 // The spec for Root.Path for Windows specifies that for Process-isolated containers, 263 // it must be in the Volume GUID (\\?\\Volume{GUID} style), not a host-mounted directory. 264 backingDevicePath, err := getBackingDeviceForContainerdMount(c.BaseFS) 265 if err != nil { 266 return errors.Wrapf(err, "createSpecWindowsFields: Failed to get backing device of BaseFS of container %s", c.ID) 267 } 268 s.Root.Path = backingDevicePath 269 } else { 270 s.Root.Path = c.BaseFS // This is not set for Hyper-V containers 271 } 272 if !strings.HasSuffix(s.Root.Path, `\`) { 273 s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\ 274 } 275 } 276 277 // First boot optimization 278 s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore 279 280 setResourcesInSpec(c, s, isHyperV) 281 282 // Read and add credentials from the security options if a credential spec has been provided. 283 if err := daemon.setWindowsCredentialSpec(c, s); err != nil { 284 return err 285 } 286 287 devices, err := setupWindowsDevices(c.HostConfig.Devices) 288 if err != nil { 289 return err 290 } 291 292 s.Windows.Devices = append(s.Windows.Devices, devices...) 293 294 return nil 295 } 296 297 // getBackingDeviceForContainerdMount extracts the backing device or directory mounted at mountPoint 298 // by containerd's mount.Mount implementation for Windows. 299 func getBackingDeviceForContainerdMount(mountPoint string) (string, error) { 300 // NOTE: This relies on details of the behaviour of containerd's mount implementation for Windows, 301 // and so is somewhat fragile. 302 // TODO: Upstream this into the mount package. 303 // The implementation would be the same, but it'll be better-encapsulated. 304 305 // See containerd/containerd/mount/mount_windows.go 306 // This is mostly just copied from mount.Unmount 307 308 const sourceStreamName = "containerd.io-source" 309 310 mountPoint = filepath.Clean(mountPoint) 311 adsFile := mountPoint + ":" + sourceStreamName 312 var layerPath string 313 314 if _, err := os.Lstat(adsFile); err == nil { 315 layerPathb, err := os.ReadFile(mountPoint + ":" + sourceStreamName) 316 if err != nil { 317 return "", fmt.Errorf("failed to retrieve layer source for mount %s: %w", mountPoint, err) 318 } 319 layerPath = string(layerPathb) 320 } 321 322 if layerPath == "" { 323 return "", fmt.Errorf("no layer source for mount %s", mountPoint) 324 } 325 326 home, layerID := filepath.Split(layerPath) 327 di := hcsshim.DriverInfo{ 328 HomeDir: home, 329 } 330 331 backingDevice, err := hcsshim.GetLayerMountPath(di, layerID) 332 if err != nil { 333 return "", fmt.Errorf("failed to retrieve backing device for layer %s: %w", mountPoint, err) 334 } 335 336 return backingDevice, nil 337 } 338 339 var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value")) 340 341 // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec` 342 // field if relevant 343 func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error { 344 if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil { 345 return nil 346 } 347 348 // TODO (jrouge/wk8): if provided with several security options, we silently ignore 349 // all but the last one (provided they're all valid, otherwise we do return an error); 350 // this doesn't seem like a great idea? 351 credentialSpec := "" 352 353 // TODO(thaJeztah): extract validating and parsing SecurityOpt to a reusable function. 354 for _, secOpt := range c.HostConfig.SecurityOpt { 355 k, v, ok := strings.Cut(secOpt, "=") 356 if !ok { 357 return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt)) 358 } 359 // FIXME(thaJeztah): options should not be case-insensitive 360 if !strings.EqualFold(k, "credentialspec") { 361 return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", k)) 362 } 363 364 scheme, value, ok := strings.Cut(v, "://") 365 if !ok || value == "" { 366 return errInvalidCredentialSpecSecOpt 367 } 368 var err error 369 switch strings.ToLower(scheme) { 370 case "file": 371 credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value)) 372 if err != nil { 373 return errdefs.InvalidParameter(err) 374 } 375 case "registry": 376 credentialSpec, err = readCredentialSpecRegistry(c.ID, value) 377 if err != nil { 378 return errdefs.InvalidParameter(err) 379 } 380 case "config": 381 // if the container does not have a DependencyStore, then it 382 // isn't swarmkit managed. In order to avoid creating any 383 // impression that `config://` is a valid API, return the same 384 // error as if you'd passed any other random word. 385 if c.DependencyStore == nil { 386 return errInvalidCredentialSpecSecOpt 387 } 388 389 csConfig, err := c.DependencyStore.Configs().Get(value) 390 if err != nil { 391 return errdefs.System(errors.Wrap(err, "error getting value from config store")) 392 } 393 // stuff the resulting secret data into a string to use as the 394 // CredentialSpec 395 credentialSpec = string(csConfig.Spec.Data) 396 case "raw": 397 credentialSpec = value 398 default: 399 return errInvalidCredentialSpecSecOpt 400 } 401 } 402 403 if credentialSpec != "" { 404 if s.Windows == nil { 405 s.Windows = &specs.Windows{} 406 } 407 s.Windows.CredentialSpec = credentialSpec 408 } 409 410 return nil 411 } 412 413 func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) { 414 // In s.Windows.Resources 415 cpuShares := uint16(c.HostConfig.CPUShares) 416 cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100 417 cpuCount := uint64(c.HostConfig.CPUCount) 418 if c.HostConfig.NanoCPUs > 0 { 419 if isHyperV { 420 cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9) 421 leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9 422 if leftoverNanoCPUs != 0 { 423 cpuCount++ 424 cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000)) 425 if cpuMaximum < 1 { 426 // The requested NanoCPUs is so small that we rounded to 0, use 1 instead 427 cpuMaximum = 1 428 } 429 } 430 } else { 431 cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000)) 432 if cpuMaximum < 1 { 433 // The requested NanoCPUs is so small that we rounded to 0, use 1 instead 434 cpuMaximum = 1 435 } 436 } 437 } 438 439 if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 { 440 if s.Windows.Resources == nil { 441 s.Windows.Resources = &specs.WindowsResources{} 442 } 443 s.Windows.Resources.CPU = &specs.WindowsCPUResources{ 444 Maximum: &cpuMaximum, 445 Shares: &cpuShares, 446 Count: &cpuCount, 447 } 448 } 449 450 memoryLimit := uint64(c.HostConfig.Memory) 451 if memoryLimit != 0 { 452 if s.Windows.Resources == nil { 453 s.Windows.Resources = &specs.WindowsResources{} 454 } 455 s.Windows.Resources.Memory = &specs.WindowsMemoryResources{ 456 Limit: &memoryLimit, 457 } 458 } 459 460 if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 { 461 if s.Windows.Resources == nil { 462 s.Windows.Resources = &specs.WindowsResources{} 463 } 464 s.Windows.Resources.Storage = &specs.WindowsStorageResources{ 465 Bps: &c.HostConfig.IOMaximumBandwidth, 466 Iops: &c.HostConfig.IOMaximumIOps, 467 } 468 } 469 } 470 471 // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig 472 // It will do nothing on non-Linux platform 473 func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig, daemonCfg *config.Config) { 474 return 475 } 476 477 // registryKey is an interface wrapper around `registry.Key`, 478 // listing only the methods we care about here. 479 // It's mainly useful to easily allow mocking the registry in tests. 480 type registryKey interface { 481 GetStringValue(name string) (val string, valtype uint32, err error) 482 Close() error 483 } 484 485 var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) { 486 return registry.OpenKey(baseKey, path, access) 487 } 488 489 // readCredentialSpecRegistry is a helper function to read a credential spec from 490 // the registry. If not found, we return an empty string and warn in the log. 491 // This allows for staging on machines which do not have the necessary components. 492 func readCredentialSpecRegistry(id, name string) (string, error) { 493 key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE) 494 if err != nil { 495 return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation) 496 } 497 defer key.Close() 498 499 value, _, err := key.GetStringValue(name) 500 if err != nil { 501 if err == registry.ErrNotExist { 502 return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id) 503 } 504 return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id) 505 } 506 507 return value, nil 508 } 509 510 // readCredentialSpecFile is a helper function to read a credential spec from 511 // a file. If not found, we return an empty string and warn in the log. 512 // This allows for staging on machines which do not have the necessary components. 513 func readCredentialSpecFile(id, root, location string) (string, error) { 514 if filepath.IsAbs(location) { 515 return "", fmt.Errorf("invalid credential spec: file:// path cannot be absolute") 516 } 517 base := filepath.Join(root, credentialSpecFileLocation) 518 full := filepath.Join(base, location) 519 if !strings.HasPrefix(full, base) { 520 return "", fmt.Errorf("invalid credential spec: file:// path must be under %s", base) 521 } 522 bcontents, err := os.ReadFile(full) 523 if err != nil { 524 return "", errors.Wrapf(err, "failed to load credential spec for container %s", id) 525 } 526 return string(bcontents[:]), nil 527 } 528 529 func setupWindowsDevices(devices []containertypes.DeviceMapping) (specDevices []specs.WindowsDevice, err error) { 530 for _, deviceMapping := range devices { 531 if strings.HasPrefix(deviceMapping.PathOnHost, "class/") { 532 specDevices = append(specDevices, specs.WindowsDevice{ 533 ID: strings.TrimPrefix(deviceMapping.PathOnHost, "class/"), 534 IDType: "class", 535 }) 536 } else { 537 idType, id, ok := strings.Cut(deviceMapping.PathOnHost, "://") 538 if !ok { 539 return nil, errors.Errorf("invalid device assignment path: '%s', must be 'class/ID' or 'IDType://ID'", deviceMapping.PathOnHost) 540 } 541 if idType == "" { 542 return nil, errors.Errorf("invalid device assignment path: '%s', IDType cannot be empty", deviceMapping.PathOnHost) 543 } 544 specDevices = append(specDevices, specs.WindowsDevice{ 545 ID: id, 546 IDType: idType, 547 }) 548 } 549 } 550 551 return specDevices, nil 552 }