github.com/wozhu6104/docker@v20.10.10+incompatible/daemon/oci_windows.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "path/filepath" 8 "runtime" 9 "strings" 10 11 "github.com/Microsoft/hcsshim/osversion" 12 containertypes "github.com/docker/docker/api/types/container" 13 "github.com/docker/docker/container" 14 "github.com/docker/docker/errdefs" 15 "github.com/docker/docker/oci" 16 "github.com/docker/docker/oci/caps" 17 "github.com/docker/docker/pkg/sysinfo" 18 "github.com/docker/docker/pkg/system" 19 specs "github.com/opencontainers/runtime-spec/specs-go" 20 "github.com/pkg/errors" 21 "github.com/sirupsen/logrus" 22 "golang.org/x/sys/windows/registry" 23 ) 24 25 const ( 26 credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs` 27 credentialSpecFileLocation = "CredentialSpecs" 28 ) 29 30 func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { 31 32 img, err := daemon.imageService.GetImage(string(c.ImageID), nil) 33 if err != nil { 34 return nil, err 35 } 36 37 s := oci.DefaultOSSpec(img.OS) 38 39 linkedEnv, err := daemon.setupLinkedContainers(c) 40 if err != nil { 41 return nil, err 42 } 43 44 // Note, unlike Unix, we do NOT call into SetupWorkingDirectory as 45 // this is done in VMCompute. Further, we couldn't do it for Hyper-V 46 // containers anyway. 47 48 if err := daemon.setupSecretDir(c); err != nil { 49 return nil, err 50 } 51 52 if err := daemon.setupConfigDir(c); err != nil { 53 return nil, err 54 } 55 56 // In s.Mounts 57 mounts, err := daemon.setupMounts(c) 58 if err != nil { 59 return nil, err 60 } 61 62 var isHyperV bool 63 if c.HostConfig.Isolation.IsDefault() { 64 // Container using default isolation, so take the default from the daemon configuration 65 isHyperV = daemon.defaultIsolation.IsHyperV() 66 } else { 67 // Container may be requesting an explicit isolation mode. 68 isHyperV = c.HostConfig.Isolation.IsHyperV() 69 } 70 71 if isHyperV { 72 s.Windows.HyperV = &specs.WindowsHyperV{} 73 } 74 75 // If the container has not been started, and has configs or secrets 76 // secrets, create symlinks to each config and secret. If it has been 77 // started before, the symlinks should have already been created. Also, it 78 // is important to not mount a Hyper-V container that has been started 79 // before, to protect the host from the container; for example, from 80 // malicious mutation of NTFS data structures. 81 if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) { 82 // The container file system is mounted before this function is called, 83 // except for Hyper-V containers, so mount it here in that case. 84 if isHyperV { 85 if err := daemon.Mount(c); err != nil { 86 return nil, err 87 } 88 defer daemon.Unmount(c) 89 } 90 if err := c.CreateSecretSymlinks(); err != nil { 91 return nil, err 92 } 93 if err := c.CreateConfigSymlinks(); err != nil { 94 return nil, err 95 } 96 } 97 98 secretMounts, err := c.SecretMounts() 99 if err != nil { 100 return nil, err 101 } 102 if secretMounts != nil { 103 mounts = append(mounts, secretMounts...) 104 } 105 106 configMounts := c.ConfigMounts() 107 if configMounts != nil { 108 mounts = append(mounts, configMounts...) 109 } 110 111 for _, mount := range mounts { 112 m := specs.Mount{ 113 Source: mount.Source, 114 Destination: mount.Destination, 115 } 116 if !mount.Writable { 117 m.Options = append(m.Options, "ro") 118 } 119 if img.OS != runtime.GOOS { 120 m.Type = "bind" 121 m.Options = append(m.Options, "rbind") 122 m.Options = append(m.Options, fmt.Sprintf("uvmpath=/tmp/gcs/%s/binds", c.ID)) 123 } 124 s.Mounts = append(s.Mounts, m) 125 } 126 127 // In s.Process 128 s.Process.Cwd = c.Config.WorkingDir 129 s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv) 130 s.Process.Terminal = c.Config.Tty 131 132 if c.Config.Tty { 133 s.Process.ConsoleSize = &specs.Box{ 134 Height: c.HostConfig.ConsoleSize[0], 135 Width: c.HostConfig.ConsoleSize[1], 136 } 137 } 138 s.Process.User.Username = c.Config.User 139 s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer) 140 if err != nil { 141 return nil, errors.Wrapf(err, "container %s", c.ID) 142 } 143 144 dnsSearch := daemon.getDNSSearchSettings(c) 145 146 // Get endpoints for the libnetwork allocated networks to the container 147 var epList []string 148 AllowUnqualifiedDNSQuery := false 149 gwHNSID := "" 150 if c.NetworkSettings != nil { 151 for n := range c.NetworkSettings.Networks { 152 sn, err := daemon.FindNetwork(n) 153 if err != nil { 154 continue 155 } 156 157 ep, err := getEndpointInNetwork(c.Name, sn) 158 if err != nil { 159 continue 160 } 161 162 data, err := ep.DriverInfo() 163 if err != nil { 164 continue 165 } 166 167 if data["GW_INFO"] != nil { 168 gwInfo := data["GW_INFO"].(map[string]interface{}) 169 if gwInfo["hnsid"] != nil { 170 gwHNSID = gwInfo["hnsid"].(string) 171 } 172 } 173 174 if data["hnsid"] != nil { 175 epList = append(epList, data["hnsid"].(string)) 176 } 177 178 if data["AllowUnqualifiedDNSQuery"] != nil { 179 AllowUnqualifiedDNSQuery = true 180 } 181 } 182 } 183 184 var networkSharedContainerID string 185 if c.HostConfig.NetworkMode.IsContainer() { 186 networkSharedContainerID = c.NetworkSharedContainerID 187 for _, ep := range c.SharedEndpointList { 188 epList = append(epList, ep) 189 } 190 } 191 192 if gwHNSID != "" { 193 epList = append(epList, gwHNSID) 194 } 195 196 s.Windows.Network = &specs.WindowsNetwork{ 197 AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery, 198 DNSSearchList: dnsSearch, 199 EndpointList: epList, 200 NetworkSharedContainerName: networkSharedContainerID, 201 } 202 203 switch img.OS { 204 case "windows": 205 if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil { 206 return nil, err 207 } 208 case "linux": 209 if !system.LCOWSupported() { 210 return nil, fmt.Errorf("Linux containers on Windows are not supported") 211 } 212 if err := daemon.createSpecLinuxFields(c, &s); err != nil { 213 return nil, err 214 } 215 default: 216 return nil, fmt.Errorf("Unsupported platform %q", img.OS) 217 } 218 219 if logrus.IsLevelEnabled(logrus.DebugLevel) { 220 if b, err := json.Marshal(&s); err == nil { 221 logrus.Debugf("Generated spec: %s", string(b)) 222 } 223 } 224 225 return (*specs.Spec)(&s), nil 226 } 227 228 // Sets the Windows-specific fields of the OCI spec 229 func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error { 230 231 s.Hostname = c.FullHostname() 232 233 if len(s.Process.Cwd) == 0 { 234 // We default to C:\ to workaround the oddity of the case that the 235 // default directory for cmd running as LocalSystem (or 236 // ContainerAdministrator) is c:\windows\system32. Hence docker run 237 // <image> cmd will by default end in c:\windows\system32, rather 238 // than 'root' (/) on Linux. The oddity is that if you have a dockerfile 239 // which has no WORKDIR and has a COPY file ., . will be interpreted 240 // as c:\. Hence, setting it to default of c:\ makes for consistency. 241 s.Process.Cwd = `C:\` 242 } 243 244 if c.Config.ArgsEscaped { 245 s.Process.CommandLine = c.Path 246 if len(c.Args) > 0 { 247 s.Process.CommandLine += " " + system.EscapeArgs(c.Args) 248 } 249 } else { 250 s.Process.Args = append([]string{c.Path}, c.Args...) 251 } 252 s.Root.Readonly = false // Windows does not support a read-only root filesystem 253 if !isHyperV { 254 if c.BaseFS == nil { 255 return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly nil") 256 } 257 258 s.Root.Path = c.BaseFS.Path() // This is not set for Hyper-V containers 259 if !strings.HasSuffix(s.Root.Path, `\`) { 260 s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\ 261 } 262 } 263 264 // First boot optimization 265 s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore 266 267 setResourcesInSpec(c, s, isHyperV) 268 269 // Read and add credentials from the security options if a credential spec has been provided. 270 if err := daemon.setWindowsCredentialSpec(c, s); err != nil { 271 return err 272 } 273 274 // Do we have any assigned devices? 275 if len(c.HostConfig.Devices) > 0 { 276 if isHyperV { 277 return errors.New("device assignment is not supported for HyperV containers") 278 } 279 if osversion.Build() < osversion.RS5 { 280 return errors.New("device assignment requires Windows builds RS5 (17763+) or later") 281 } 282 for _, deviceMapping := range c.HostConfig.Devices { 283 srcParts := strings.SplitN(deviceMapping.PathOnHost, "/", 2) 284 if len(srcParts) != 2 { 285 return errors.New("invalid device assignment path") 286 } 287 if srcParts[0] != "class" { 288 return errors.Errorf("invalid device assignment type: '%s' should be 'class'", srcParts[0]) 289 } 290 wd := specs.WindowsDevice{ 291 ID: srcParts[1], 292 IDType: srcParts[0], 293 } 294 s.Windows.Devices = append(s.Windows.Devices, wd) 295 } 296 } 297 298 return nil 299 } 300 301 var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value")) 302 303 // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec` 304 // field if relevant 305 func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error { 306 if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil { 307 return nil 308 } 309 310 // TODO (jrouge/wk8): if provided with several security options, we silently ignore 311 // all but the last one (provided they're all valid, otherwise we do return an error); 312 // this doesn't seem like a great idea? 313 credentialSpec := "" 314 315 for _, secOpt := range c.HostConfig.SecurityOpt { 316 optSplits := strings.SplitN(secOpt, "=", 2) 317 if len(optSplits) != 2 { 318 return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt)) 319 } 320 if !strings.EqualFold(optSplits[0], "credentialspec") { 321 return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", optSplits[0])) 322 } 323 324 credSpecSplits := strings.SplitN(optSplits[1], "://", 2) 325 if len(credSpecSplits) != 2 || credSpecSplits[1] == "" { 326 return errInvalidCredentialSpecSecOpt 327 } 328 value := credSpecSplits[1] 329 330 var err error 331 switch strings.ToLower(credSpecSplits[0]) { 332 case "file": 333 if credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value)); err != nil { 334 return errdefs.InvalidParameter(err) 335 } 336 case "registry": 337 if credentialSpec, err = readCredentialSpecRegistry(c.ID, value); err != nil { 338 return errdefs.InvalidParameter(err) 339 } 340 case "config": 341 // if the container does not have a DependencyStore, then it 342 // isn't swarmkit managed. In order to avoid creating any 343 // impression that `config://` is a valid API, return the same 344 // error as if you'd passed any other random word. 345 if c.DependencyStore == nil { 346 return errInvalidCredentialSpecSecOpt 347 } 348 349 csConfig, err := c.DependencyStore.Configs().Get(value) 350 if err != nil { 351 return errdefs.System(errors.Wrap(err, "error getting value from config store")) 352 } 353 // stuff the resulting secret data into a string to use as the 354 // CredentialSpec 355 credentialSpec = string(csConfig.Spec.Data) 356 case "raw": 357 credentialSpec = value 358 default: 359 return errInvalidCredentialSpecSecOpt 360 } 361 } 362 363 if credentialSpec != "" { 364 if s.Windows == nil { 365 s.Windows = &specs.Windows{} 366 } 367 s.Windows.CredentialSpec = credentialSpec 368 } 369 370 return nil 371 } 372 373 // Sets the Linux-specific fields of the OCI spec 374 // TODO: LCOW Support. We need to do a lot more pulling in what can 375 // be pulled in from oci_linux.go. 376 func (daemon *Daemon) createSpecLinuxFields(c *container.Container, s *specs.Spec) error { 377 s.Root = &specs.Root{ 378 Path: "rootfs", 379 Readonly: c.HostConfig.ReadonlyRootfs, 380 } 381 382 s.Hostname = c.Config.Hostname 383 setLinuxDomainname(c, s) 384 385 if len(s.Process.Cwd) == 0 { 386 s.Process.Cwd = `/` 387 } 388 s.Process.Args = append([]string{c.Path}, c.Args...) 389 390 // Note these are against the UVM. 391 setResourcesInSpec(c, s, true) // LCOW is Hyper-V only 392 393 capabilities, err := caps.TweakCapabilities(caps.DefaultCapabilities(), c.HostConfig.CapAdd, c.HostConfig.CapDrop, c.HostConfig.Privileged) 394 if err != nil { 395 return fmt.Errorf("linux spec capabilities: %v", err) 396 } 397 if err := oci.SetCapabilities(s, capabilities); err != nil { 398 return fmt.Errorf("linux spec capabilities: %v", err) 399 } 400 devPermissions, err := oci.AppendDevicePermissionsFromCgroupRules(nil, c.HostConfig.DeviceCgroupRules) 401 if err != nil { 402 return fmt.Errorf("linux runtime spec devices: %v", err) 403 } 404 s.Linux.Resources.Devices = devPermissions 405 return nil 406 } 407 408 func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) { 409 // In s.Windows.Resources 410 cpuShares := uint16(c.HostConfig.CPUShares) 411 cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100 412 cpuCount := uint64(c.HostConfig.CPUCount) 413 if c.HostConfig.NanoCPUs > 0 { 414 if isHyperV { 415 cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9) 416 leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9 417 if leftoverNanoCPUs != 0 { 418 cpuCount++ 419 cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000)) 420 if cpuMaximum < 1 { 421 // The requested NanoCPUs is so small that we rounded to 0, use 1 instead 422 cpuMaximum = 1 423 } 424 } 425 } else { 426 cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000)) 427 if cpuMaximum < 1 { 428 // The requested NanoCPUs is so small that we rounded to 0, use 1 instead 429 cpuMaximum = 1 430 } 431 } 432 } 433 434 if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 { 435 if s.Windows.Resources == nil { 436 s.Windows.Resources = &specs.WindowsResources{} 437 } 438 s.Windows.Resources.CPU = &specs.WindowsCPUResources{ 439 Maximum: &cpuMaximum, 440 Shares: &cpuShares, 441 Count: &cpuCount, 442 } 443 } 444 445 memoryLimit := uint64(c.HostConfig.Memory) 446 if memoryLimit != 0 { 447 if s.Windows.Resources == nil { 448 s.Windows.Resources = &specs.WindowsResources{} 449 } 450 s.Windows.Resources.Memory = &specs.WindowsMemoryResources{ 451 Limit: &memoryLimit, 452 } 453 } 454 455 if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 { 456 if s.Windows.Resources == nil { 457 s.Windows.Resources = &specs.WindowsResources{} 458 } 459 s.Windows.Resources.Storage = &specs.WindowsStorageResources{ 460 Bps: &c.HostConfig.IOMaximumBandwidth, 461 Iops: &c.HostConfig.IOMaximumIOps, 462 } 463 } 464 } 465 466 // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig 467 // It will do nothing on non-Linux platform 468 func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) { 469 return 470 } 471 472 // registryKey is an interface wrapper around `registry.Key`, 473 // listing only the methods we care about here. 474 // It's mainly useful to easily allow mocking the registry in tests. 475 type registryKey interface { 476 GetStringValue(name string) (val string, valtype uint32, err error) 477 Close() error 478 } 479 480 var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) { 481 return registry.OpenKey(baseKey, path, access) 482 } 483 484 // readCredentialSpecRegistry is a helper function to read a credential spec from 485 // the registry. If not found, we return an empty string and warn in the log. 486 // This allows for staging on machines which do not have the necessary components. 487 func readCredentialSpecRegistry(id, name string) (string, error) { 488 key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE) 489 if err != nil { 490 return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation) 491 } 492 defer key.Close() 493 494 value, _, err := key.GetStringValue(name) 495 if err != nil { 496 if err == registry.ErrNotExist { 497 return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id) 498 } 499 return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id) 500 } 501 502 return value, nil 503 } 504 505 // readCredentialSpecFile is a helper function to read a credential spec from 506 // a file. If not found, we return an empty string and warn in the log. 507 // This allows for staging on machines which do not have the necessary components. 508 func readCredentialSpecFile(id, root, location string) (string, error) { 509 if filepath.IsAbs(location) { 510 return "", fmt.Errorf("invalid credential spec - file:// path cannot be absolute") 511 } 512 base := filepath.Join(root, credentialSpecFileLocation) 513 full := filepath.Join(base, location) 514 if !strings.HasPrefix(full, base) { 515 return "", fmt.Errorf("invalid credential spec - file:// path must be under %s", base) 516 } 517 bcontents, err := ioutil.ReadFile(full) 518 if err != nil { 519 return "", errors.Wrapf(err, "credential spec for container %s could not be read from file %q", id, full) 520 } 521 return string(bcontents[:]), nil 522 }