github.com/lazyboychen7/engine@v17.12.1-ce-rc2+incompatible/daemon/graphdriver/lcow/lcow.go (about) 1 // +build windows 2 3 // Maintainer: jhowardmsft 4 // Locale: en-gb 5 // About: Graph-driver for Linux Containers On Windows (LCOW) 6 // 7 // This graphdriver runs in two modes. Yet to be determined which one will 8 // be the shipping mode. The global mode is where a single utility VM 9 // is used for all service VM tool operations. This isn't safe security-wise 10 // as it's attaching a sandbox of multiple containers to it, containing 11 // untrusted data. This may be fine for client devops scenarios. In 12 // safe mode, a unique utility VM is instantiated for all service VM tool 13 // operations. The downside of safe-mode is that operations are slower as 14 // a new service utility VM has to be started and torn-down when needed. 15 // 16 // Options: 17 // 18 // The following options are read by the graphdriver itself: 19 // 20 // * lcow.globalmode - Enables global service VM Mode 21 // -- Possible values: true/false 22 // -- Default if omitted: false 23 // 24 // * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container 25 // -- Possible values: >= default sandbox size (opengcs defined, currently 20) 26 // -- Default if omitted: 20 27 // 28 // The following options are read by opengcs: 29 // 30 // * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair 31 // -- Possible values: Any local path that is not a mapped drive 32 // -- Default if omitted: %ProgramFiles%\Linux Containers 33 // 34 // * lcow.kernel - Specifies a custom kernel file located in the `lcow.kirdpath` path 35 // -- Possible values: Any valid filename 36 // -- Default if omitted: bootx64.efi 37 // 38 // * lcow.initrd - Specifies a custom initrd file located in the `lcow.kirdpath` path 39 // -- Possible values: Any valid filename 40 // -- Default if omitted: initrd.img 41 // 42 // * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode 43 // -- Possible values: Any valid linux kernel boot options 44 // -- Default if omitted: <nil> 45 // 46 // * lcow.vhdx - Specifies a custom vhdx file to boot (instead of a kernel+initrd) 47 // -- Possible values: Any valid filename 48 // -- Default if omitted: uvm.vhdx under `lcow.kirdpath` 49 // 50 // * lcow.timeout - Specifies a timeout for utility VM operations in seconds 51 // -- Possible values: >=0 52 // -- Default if omitted: 300 53 54 // TODO: Grab logs from SVM at terminate or errors 55 56 package lcow 57 58 import ( 59 "encoding/json" 60 "fmt" 61 "io" 62 "io/ioutil" 63 "os" 64 "path/filepath" 65 "strconv" 66 "strings" 67 "sync" 68 "syscall" 69 "time" 70 71 "github.com/Microsoft/hcsshim" 72 "github.com/Microsoft/opengcs/client" 73 "github.com/docker/docker/daemon/graphdriver" 74 "github.com/docker/docker/pkg/archive" 75 "github.com/docker/docker/pkg/containerfs" 76 "github.com/docker/docker/pkg/idtools" 77 "github.com/docker/docker/pkg/ioutils" 78 "github.com/docker/docker/pkg/system" 79 "github.com/sirupsen/logrus" 80 ) 81 82 // init registers this driver to the register. It gets initialised by the 83 // function passed in the second parameter, implemented in this file. 84 func init() { 85 graphdriver.Register("lcow", InitDriver) 86 } 87 88 const ( 89 // sandboxFilename is the name of the file containing a layer's sandbox (read-write layer). 90 sandboxFilename = "sandbox.vhdx" 91 92 // scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory. 93 scratchFilename = "scratch.vhdx" 94 95 // layerFilename is the name of the file containing a layer's read-only contents. 96 // Note this really is VHD format, not VHDX. 97 layerFilename = "layer.vhd" 98 99 // toolsScratchPath is a location in a service utility VM that the tools can use as a 100 // scratch space to avoid running out of memory. 101 toolsScratchPath = "/tmp/scratch" 102 103 // svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode. 104 svmGlobalID = "_lcow_global_svm_" 105 106 // cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs. 107 cacheDirectory = "cache" 108 109 // scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs 110 scratchDirectory = "scratch" 111 112 // errOperationPending is the HRESULT returned by the HCS when the VM termination operation is still pending. 113 errOperationPending syscall.Errno = 0xc0370103 114 ) 115 116 // Driver represents an LCOW graph driver. 117 type Driver struct { 118 dataRoot string // Root path on the host where we are storing everything. 119 cachedSandboxFile string // Location of the local default-sized cached sandbox. 120 cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox. 121 cachedScratchFile string // Location of the local cached empty scratch space. 122 cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch. 123 options []string // Graphdriver options we are initialised with. 124 globalMode bool // Indicates if running in an unsafe/global service VM mode. 125 126 // NOTE: It is OK to use a cache here because Windows does not support 127 // restoring containers when the daemon dies. 128 serviceVms *serviceVMMap // Map of the configs representing the service VM(s) we are running. 129 } 130 131 // layerDetails is the structure returned by a helper function `getLayerDetails` 132 // for getting information about a layer folder 133 type layerDetails struct { 134 filename string // \path\to\sandbox.vhdx or \path\to\layer.vhd 135 size int64 // size of the above file 136 isSandbox bool // true if sandbox.vhdx 137 } 138 139 // deletefiles is a helper function for initialisation where we delete any 140 // left-over scratch files in case we were previously forcibly terminated. 141 func deletefiles(path string, f os.FileInfo, err error) error { 142 if strings.HasSuffix(f.Name(), ".vhdx") { 143 logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path) 144 return os.Remove(path) 145 } 146 return nil 147 } 148 149 // InitDriver returns a new LCOW storage driver. 150 func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) { 151 title := "lcowdriver: init:" 152 153 cd := filepath.Join(dataRoot, cacheDirectory) 154 sd := filepath.Join(dataRoot, scratchDirectory) 155 156 d := &Driver{ 157 dataRoot: dataRoot, 158 options: options, 159 cachedSandboxFile: filepath.Join(cd, sandboxFilename), 160 cachedScratchFile: filepath.Join(cd, scratchFilename), 161 serviceVms: &serviceVMMap{ 162 svms: make(map[string]*serviceVMMapItem), 163 }, 164 globalMode: false, 165 } 166 167 // Looks for relevant options 168 for _, v := range options { 169 opt := strings.SplitN(v, "=", 2) 170 if len(opt) == 2 { 171 switch strings.ToLower(opt[0]) { 172 case "lcow.globalmode": 173 var err error 174 d.globalMode, err = strconv.ParseBool(opt[1]) 175 if err != nil { 176 return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title) 177 } 178 break 179 } 180 } 181 } 182 183 // Make sure the dataRoot directory is created 184 if err := idtools.MkdirAllAndChown(dataRoot, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil { 185 return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err) 186 } 187 188 // Make sure the cache directory is created under dataRoot 189 if err := idtools.MkdirAllAndChown(cd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil { 190 return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err) 191 } 192 193 // Make sure the scratch directory is created under dataRoot 194 if err := idtools.MkdirAllAndChown(sd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil { 195 return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err) 196 } 197 198 // Delete any items in the scratch directory 199 filepath.Walk(sd, deletefiles) 200 201 logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode) 202 203 return d, nil 204 } 205 206 func (d *Driver) getVMID(id string) string { 207 if d.globalMode { 208 return svmGlobalID 209 } 210 return id 211 } 212 213 // startServiceVMIfNotRunning starts a service utility VM if it is not currently running. 214 // It can optionally be started with a mapped virtual disk. Returns a opengcs config structure 215 // representing the VM. 216 func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd []hcsshim.MappedVirtualDisk, context string) (_ *serviceVM, err error) { 217 // Use the global ID if in global mode 218 id = d.getVMID(id) 219 220 title := fmt.Sprintf("lcowdriver: startservicevmifnotrunning %s:", id) 221 222 // Attempt to add ID to the service vm map 223 logrus.Debugf("%s: Adding entry to service vm map", title) 224 svm, exists, err := d.serviceVms.add(id) 225 if err != nil && err == errVMisTerminating { 226 // VM is in the process of terminating. Wait until it's done and and then try again 227 logrus.Debugf("%s: VM with current ID still in the process of terminating: %s", title, id) 228 if err := svm.getStopError(); err != nil { 229 logrus.Debugf("%s: VM %s did not stop successfully: %s", title, id, err) 230 return nil, err 231 } 232 return d.startServiceVMIfNotRunning(id, mvdToAdd, context) 233 } else if err != nil { 234 logrus.Debugf("%s: failed to add service vm to map: %s", err) 235 return nil, fmt.Errorf("%s: failed to add to service vm map: %s", title, err) 236 } 237 238 if exists { 239 // Service VM is already up and running. In this case, just hot add the vhds. 240 logrus.Debugf("%s: service vm already exists. Just hot adding: %+v", title, mvdToAdd) 241 if err := svm.hotAddVHDs(mvdToAdd...); err != nil { 242 logrus.Debugf("%s: failed to hot add vhds on service vm creation: %s", title, err) 243 return nil, fmt.Errorf("%s: failed to hot add vhds on service vm: %s", title, err) 244 } 245 return svm, nil 246 } 247 248 // We are the first service for this id, so we need to start it 249 logrus.Debugf("%s: service vm doesn't exist. Now starting it up: %s", title, id) 250 251 defer func() { 252 // Signal that start has finished, passing in the error if any. 253 svm.signalStartFinished(err) 254 if err != nil { 255 // We added a ref to the VM, since we failed, we should delete the ref. 256 d.terminateServiceVM(id, "error path on startServiceVMIfNotRunning", false) 257 } 258 }() 259 260 // Generate a default configuration 261 if err := svm.config.GenerateDefault(d.options); err != nil { 262 return nil, fmt.Errorf("%s failed to generate default gogcs configuration for global svm (%s): %s", title, context, err) 263 } 264 265 // For the name, we deliberately suffix if safe-mode to ensure that it doesn't 266 // clash with another utility VM which may be running for the container itself. 267 // This also makes it easier to correlate through Get-ComputeProcess. 268 if id == svmGlobalID { 269 svm.config.Name = svmGlobalID 270 } else { 271 svm.config.Name = fmt.Sprintf("%s_svm", id) 272 } 273 274 // Ensure we take the cached scratch mutex around the check to ensure the file is complete 275 // and not in the process of being created by another thread. 276 scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id)) 277 278 logrus.Debugf("%s locking cachedScratchMutex", title) 279 d.cachedScratchMutex.Lock() 280 if _, err := os.Stat(d.cachedScratchFile); err == nil { 281 // Make a copy of cached scratch to the scratch directory 282 logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for mvd", context) 283 if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil { 284 logrus.Debugf("%s releasing cachedScratchMutex on err: %s", title, err) 285 d.cachedScratchMutex.Unlock() 286 return nil, err 287 } 288 289 // Add the cached clone as a mapped virtual disk 290 logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) adding cloned scratch as mvd", context) 291 mvd := hcsshim.MappedVirtualDisk{ 292 HostPath: scratchTargetFile, 293 ContainerPath: toolsScratchPath, 294 CreateInUtilityVM: true, 295 } 296 svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd) 297 svm.scratchAttached = true 298 } 299 300 logrus.Debugf("%s releasing cachedScratchMutex", title) 301 d.cachedScratchMutex.Unlock() 302 303 // If requested to start it with a mapped virtual disk, add it now. 304 svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvdToAdd...) 305 for _, mvd := range svm.config.MappedVirtualDisks { 306 svm.attachedVHDs[mvd.HostPath] = 1 307 } 308 309 // Start it. 310 logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) starting %s", context, svm.config.Name) 311 if err := svm.config.StartUtilityVM(); err != nil { 312 return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err) 313 } 314 315 // defer function to terminate the VM if the next steps fail 316 defer func() { 317 if err != nil { 318 waitTerminate(svm, fmt.Sprintf("startServiceVmIfNotRunning: %s (%s)", id, context)) 319 } 320 }() 321 322 // Now we have a running service VM, we can create the cached scratch file if it doesn't exist. 323 logrus.Debugf("%s locking cachedScratchMutex", title) 324 d.cachedScratchMutex.Lock() 325 if _, err := os.Stat(d.cachedScratchFile); err != nil { 326 logrus.Debugf("%s (%s): creating an SVM scratch", title, context) 327 328 // Don't use svm.CreateExt4Vhdx since that only works when the service vm is setup, 329 // but we're still in that process right now. 330 if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil { 331 logrus.Debugf("%s (%s): releasing cachedScratchMutex on error path", title, context) 332 d.cachedScratchMutex.Unlock() 333 logrus.Debugf("%s: failed to create vm scratch %s: %s", title, scratchTargetFile, err) 334 return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err) 335 } 336 } 337 logrus.Debugf("%s (%s): releasing cachedScratchMutex", title, context) 338 d.cachedScratchMutex.Unlock() 339 340 // Hot-add the scratch-space if not already attached 341 if !svm.scratchAttached { 342 logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) hot-adding scratch %s", context, scratchTargetFile) 343 if err := svm.hotAddVHDsAtStart(hcsshim.MappedVirtualDisk{ 344 HostPath: scratchTargetFile, 345 ContainerPath: toolsScratchPath, 346 CreateInUtilityVM: true, 347 }); err != nil { 348 logrus.Debugf("%s: failed to hot-add scratch %s: %s", title, scratchTargetFile, err) 349 return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err) 350 } 351 svm.scratchAttached = true 352 } 353 354 logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) success", context) 355 return svm, nil 356 } 357 358 // terminateServiceVM terminates a service utility VM if its running if it's, 359 // not being used by any goroutine, but does nothing when in global mode as it's 360 // lifetime is limited to that of the daemon. If the force flag is set, then 361 // the VM will be killed regardless of the ref count or if it's global. 362 func (d *Driver) terminateServiceVM(id, context string, force bool) (err error) { 363 // We don't do anything in safe mode unless the force flag has been passed, which 364 // is only the case for cleanup at driver termination. 365 if d.globalMode && !force { 366 logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context) 367 return nil 368 } 369 370 id = d.getVMID(id) 371 372 var svm *serviceVM 373 var lastRef bool 374 if !force { 375 // In the not force case, we ref count 376 svm, lastRef, err = d.serviceVms.decrementRefCount(id) 377 } else { 378 // In the force case, we ignore the ref count and just set it to 0 379 svm, err = d.serviceVms.setRefCountZero(id) 380 lastRef = true 381 } 382 383 if err == errVMUnknown { 384 return nil 385 } else if err == errVMisTerminating { 386 return svm.getStopError() 387 } else if !lastRef { 388 return nil 389 } 390 391 // We run the deletion of the scratch as a deferred function to at least attempt 392 // clean-up in case of errors. 393 defer func() { 394 if svm.scratchAttached { 395 scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id)) 396 logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile) 397 if errRemove := os.Remove(scratchTargetFile); errRemove != nil { 398 logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, errRemove) 399 err = errRemove 400 } 401 } 402 403 // This function shouldn't actually return error unless there is a bug 404 if errDelete := d.serviceVms.deleteID(id); errDelete != nil { 405 logrus.Warnf("failed to service vm from svm map %s (%s): %s", id, context, errDelete) 406 } 407 408 // Signal that this VM has stopped 409 svm.signalStopFinished(err) 410 }() 411 412 // Now it's possible that the serivce VM failed to start and now we are trying to termiante it. 413 // In this case, we will relay the error to the goroutines waiting for this vm to stop. 414 if err := svm.getStartError(); err != nil { 415 logrus.Debugf("lcowdriver: terminateservicevm: %s had failed to start up: %s", id, err) 416 return err 417 } 418 419 if err := waitTerminate(svm, fmt.Sprintf("terminateservicevm: %s (%s)", id, context)); err != nil { 420 return err 421 } 422 423 logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context) 424 return nil 425 } 426 427 func waitTerminate(svm *serviceVM, context string) error { 428 if svm.config == nil { 429 return fmt.Errorf("lcowdriver: waitTermiante: Nil utility VM. %s", context) 430 } 431 432 logrus.Debugf("lcowdriver: waitTerminate: Calling terminate: %s", context) 433 if err := svm.config.Uvm.Terminate(); err != nil { 434 // We might get operation still pending from the HCS. In that case, we shouldn't return 435 // an error since we call wait right after. 436 underlyingError := err 437 if conterr, ok := err.(*hcsshim.ContainerError); ok { 438 underlyingError = conterr.Err 439 } 440 441 if syscallErr, ok := underlyingError.(syscall.Errno); ok { 442 underlyingError = syscallErr 443 } 444 445 if underlyingError != errOperationPending { 446 return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err) 447 } 448 logrus.Debugf("lcowdriver: waitTerminate: uvm.Terminate() returned operation pending (%s)", context) 449 } 450 451 logrus.Debugf("lcowdriver: waitTerminate: (%s) - waiting for utility VM to terminate", context) 452 if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil { 453 return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err) 454 } 455 return nil 456 } 457 458 // String returns the string representation of a driver. This should match 459 // the name the graph driver has been registered with. 460 func (d *Driver) String() string { 461 return "lcow" 462 } 463 464 // Status returns the status of the driver. 465 func (d *Driver) Status() [][2]string { 466 return [][2]string{ 467 {"LCOW", ""}, 468 // TODO: Add some more info here - mode, home, .... 469 } 470 } 471 472 // Exists returns true if the given id is registered with this driver. 473 func (d *Driver) Exists(id string) bool { 474 _, err := os.Lstat(d.dir(id)) 475 logrus.Debugf("lcowdriver: exists: id %s %t", id, err == nil) 476 return err == nil 477 } 478 479 // CreateReadWrite creates a layer that is writable for use as a container 480 // file system. That equates to creating a sandbox. 481 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 482 title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id) 483 logrus.Debugf(title) 484 485 // First we need to create the folder 486 if err := d.Create(id, parent, opts); err != nil { 487 return err 488 } 489 490 // Look for an explicit sandbox size option. 491 sandboxSize := uint64(client.DefaultVhdxSizeGB) 492 for k, v := range opts.StorageOpt { 493 switch strings.ToLower(k) { 494 case "lcow.sandboxsize": 495 var err error 496 sandboxSize, err = strconv.ParseUint(v, 10, 32) 497 if err != nil { 498 return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v) 499 } 500 if sandboxSize < client.DefaultVhdxSizeGB { 501 return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB) 502 } 503 break 504 } 505 } 506 507 // Massive perf optimisation here. If we know that the RW layer is the default size, 508 // and that the cached sandbox already exists, and we are running in safe mode, we 509 // can just do a simple copy into the layers sandbox file without needing to start a 510 // unique service VM. For a global service VM, it doesn't really matter. Of course, 511 // this is only the case where the sandbox is the default size. 512 // 513 // Make sure we have the sandbox mutex taken while we are examining it. 514 if sandboxSize == client.DefaultVhdxSizeGB { 515 logrus.Debugf("%s: locking cachedSandboxMutex", title) 516 d.cachedSandboxMutex.Lock() 517 _, err := os.Stat(d.cachedSandboxFile) 518 logrus.Debugf("%s: releasing cachedSandboxMutex", title) 519 d.cachedSandboxMutex.Unlock() 520 if err == nil { 521 logrus.Debugf("%s: using cached sandbox to populate", title) 522 if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil { 523 return err 524 } 525 return nil 526 } 527 } 528 529 logrus.Debugf("%s: creating SVM to create sandbox", title) 530 svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite") 531 if err != nil { 532 return err 533 } 534 defer d.terminateServiceVM(id, "createreadwrite", false) 535 536 // So the sandbox needs creating. If default size ensure we are the only thread populating the cache. 537 // Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex. 538 if sandboxSize == client.DefaultVhdxSizeGB { 539 logrus.Debugf("%s: locking cachedSandboxMutex for creation", title) 540 d.cachedSandboxMutex.Lock() 541 defer func() { 542 logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title) 543 d.cachedSandboxMutex.Unlock() 544 }() 545 } 546 547 // Make sure we don't write to our local cached copy if this is for a non-default size request. 548 targetCacheFile := d.cachedSandboxFile 549 if sandboxSize != client.DefaultVhdxSizeGB { 550 targetCacheFile = "" 551 } 552 553 // Create the ext4 vhdx 554 logrus.Debugf("%s: creating sandbox ext4 vhdx", title) 555 if err := svm.createExt4VHDX(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil { 556 logrus.Debugf("%s: failed to create sandbox vhdx for %s: %s", title, id, err) 557 return err 558 } 559 return nil 560 } 561 562 // Create creates the folder for the layer with the given id, and 563 // adds it to the layer chain. 564 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 565 logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent) 566 567 parentChain, err := d.getLayerChain(parent) 568 if err != nil { 569 return err 570 } 571 572 var layerChain []string 573 if parent != "" { 574 if !d.Exists(parent) { 575 return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent) 576 } 577 layerChain = []string{d.dir(parent)} 578 } 579 layerChain = append(layerChain, parentChain...) 580 581 // Make sure layers are created with the correct ACL so that VMs can access them. 582 layerPath := d.dir(id) 583 logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath) 584 if err := system.MkdirAllWithACL(layerPath, 755, system.SddlNtvmAdministratorsLocalSystem); err != nil { 585 return err 586 } 587 588 if err := d.setLayerChain(id, layerChain); err != nil { 589 if err2 := os.RemoveAll(layerPath); err2 != nil { 590 logrus.Warnf("failed to remove layer %s: %s", layerPath, err2) 591 } 592 return err 593 } 594 logrus.Debugf("lcowdriver: create: id %s: success", id) 595 596 return nil 597 } 598 599 // Remove unmounts and removes the dir information. 600 func (d *Driver) Remove(id string) error { 601 logrus.Debugf("lcowdriver: remove: id %s", id) 602 tmpID := fmt.Sprintf("%s-removing", id) 603 tmpLayerPath := d.dir(tmpID) 604 layerPath := d.dir(id) 605 606 logrus.Debugf("lcowdriver: remove: id %s: layerPath %s", id, layerPath) 607 608 // Unmount all the layers 609 err := d.Put(id) 610 if err != nil { 611 logrus.Debugf("lcowdriver: remove id %s: failed to unmount: %s", id, err) 612 return err 613 } 614 615 // for non-global case just kill the vm 616 if !d.globalMode { 617 if err := d.terminateServiceVM(id, fmt.Sprintf("Remove %s", id), true); err != nil { 618 return err 619 } 620 } 621 622 if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) { 623 return err 624 } 625 626 if err := os.RemoveAll(tmpLayerPath); err != nil { 627 return err 628 } 629 630 logrus.Debugf("lcowdriver: remove: id %s: layerPath %s succeeded", id, layerPath) 631 return nil 632 } 633 634 // Get returns the rootfs path for the id. It is reference counted and 635 // effectively can be thought of as a "mount the layer into the utility 636 // vm if it isn't already". The contract from the caller of this is that 637 // all Gets and Puts are matched. It -should- be the case that on cleanup, 638 // nothing is mounted. 639 // 640 // For optimisation, we don't actually mount the filesystem (which in our 641 // case means [hot-]adding it to a service VM. But we track that and defer 642 // the actual adding to the point we need to access it. 643 func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) { 644 title := fmt.Sprintf("lcowdriver: get: %s", id) 645 logrus.Debugf(title) 646 647 // Generate the mounts needed for the defered operation. 648 disks, err := d.getAllMounts(id) 649 if err != nil { 650 logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err) 651 return nil, fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err) 652 } 653 654 logrus.Debugf("%s: got layer mounts: %+v", title, disks) 655 return &lcowfs{ 656 root: unionMountName(disks), 657 d: d, 658 mappedDisks: disks, 659 vmID: d.getVMID(id), 660 }, nil 661 } 662 663 // Put does the reverse of get. If there are no more references to 664 // the layer, it unmounts it from the utility VM. 665 func (d *Driver) Put(id string) error { 666 title := fmt.Sprintf("lcowdriver: put: %s", id) 667 668 // Get the service VM that we need to remove from 669 svm, err := d.serviceVms.get(d.getVMID(id)) 670 if err == errVMUnknown { 671 return nil 672 } else if err == errVMisTerminating { 673 return svm.getStopError() 674 } 675 676 // Generate the mounts that Get() might have mounted 677 disks, err := d.getAllMounts(id) 678 if err != nil { 679 logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err) 680 return fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err) 681 } 682 683 // Now, we want to perform the unmounts, hot-remove and stop the service vm. 684 // We want to go though all the steps even if we have an error to clean up properly 685 err = svm.deleteUnionMount(unionMountName(disks), disks...) 686 if err != nil { 687 logrus.Debugf("%s failed to delete union mount %s: %s", title, id, err) 688 } 689 690 err1 := svm.hotRemoveVHDs(disks...) 691 if err1 != nil { 692 logrus.Debugf("%s failed to hot remove vhds %s: %s", title, id, err) 693 if err == nil { 694 err = err1 695 } 696 } 697 698 err1 = d.terminateServiceVM(id, fmt.Sprintf("Put %s", id), false) 699 if err1 != nil { 700 logrus.Debugf("%s failed to terminate service vm %s: %s", title, id, err1) 701 if err == nil { 702 err = err1 703 } 704 } 705 logrus.Debugf("Put succeeded on id %s", id) 706 return err 707 } 708 709 // Cleanup ensures the information the driver stores is properly removed. 710 // We use this opportunity to cleanup any -removing folders which may be 711 // still left if the daemon was killed while it was removing a layer. 712 func (d *Driver) Cleanup() error { 713 title := "lcowdriver: cleanup" 714 715 items, err := ioutil.ReadDir(d.dataRoot) 716 if err != nil { 717 if os.IsNotExist(err) { 718 return nil 719 } 720 return err 721 } 722 723 // Note we don't return an error below - it's possible the files 724 // are locked. However, next time around after the daemon exits, 725 // we likely will be able to to cleanup successfully. Instead we log 726 // warnings if there are errors. 727 for _, item := range items { 728 if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") { 729 if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil { 730 logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err) 731 } else { 732 logrus.Infof("%s cleaned up %s", title, item.Name()) 733 } 734 } 735 } 736 737 // Cleanup any service VMs we have running, along with their scratch spaces. 738 // We don't take the lock for this as it's taken in terminateServiceVm. 739 for k, v := range d.serviceVms.svms { 740 logrus.Debugf("%s svm entry: %s: %+v", title, k, v) 741 d.terminateServiceVM(k, "cleanup", true) 742 } 743 744 return nil 745 } 746 747 // Diff takes a layer (and it's parent layer which may be null, but 748 // is ignored by this implementation below) and returns a reader for 749 // a tarstream representing the layers contents. The id could be 750 // a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics 751 // of this function dictate that the layer is already mounted. 752 // However, as we do lazy mounting as a performance optimisation, 753 // this will likely not be the case. 754 func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) { 755 title := fmt.Sprintf("lcowdriver: diff: %s", id) 756 757 // Get VHDX info 758 ld, err := getLayerDetails(d.dir(id)) 759 if err != nil { 760 logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err) 761 return nil, err 762 } 763 764 // Start the SVM with a mapped virtual disk. Note that if the SVM is 765 // already running and we are in global mode, this will be 766 // hot-added. 767 mvd := hcsshim.MappedVirtualDisk{ 768 HostPath: ld.filename, 769 ContainerPath: hostToGuest(ld.filename), 770 CreateInUtilityVM: true, 771 ReadOnly: true, 772 } 773 774 logrus.Debugf("%s: starting service VM", title) 775 svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diff %s", id)) 776 if err != nil { 777 return nil, err 778 } 779 780 logrus.Debugf("lcowdriver: diff: waiting for svm to finish booting") 781 err = svm.getStartError() 782 if err != nil { 783 d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false) 784 return nil, fmt.Errorf("lcowdriver: diff: svm failed to boot: %s", err) 785 } 786 787 // Obtain the tar stream for it 788 logrus.Debugf("%s: %s %s, size %d, ReadOnly %t", title, ld.filename, mvd.ContainerPath, ld.size, ld.isSandbox) 789 tarReadCloser, err := svm.config.VhdToTar(mvd.HostPath, mvd.ContainerPath, ld.isSandbox, ld.size) 790 if err != nil { 791 svm.hotRemoveVHDs(mvd) 792 d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false) 793 return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err) 794 } 795 796 logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent) 797 798 // In safe/non-global mode, we can't tear down the service VM until things have been read. 799 return ioutils.NewReadCloserWrapper(tarReadCloser, func() error { 800 tarReadCloser.Close() 801 svm.hotRemoveVHDs(mvd) 802 d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false) 803 return nil 804 }), nil 805 } 806 807 // ApplyDiff extracts the changeset from the given diff into the 808 // layer with the specified id and parent, returning the size of the 809 // new layer in bytes. The layer should not be mounted when calling 810 // this function. Another way of describing this is that ApplyDiff writes 811 // to a new layer (a VHD in LCOW) the contents of a tarstream it's given. 812 func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) { 813 logrus.Debugf("lcowdriver: applydiff: id %s", id) 814 815 svm, err := d.startServiceVMIfNotRunning(id, nil, fmt.Sprintf("applydiff %s", id)) 816 if err != nil { 817 return 0, err 818 } 819 defer d.terminateServiceVM(id, fmt.Sprintf("applydiff %s", id), false) 820 821 logrus.Debugf("lcowdriver: applydiff: waiting for svm to finish booting") 822 err = svm.getStartError() 823 if err != nil { 824 return 0, fmt.Errorf("lcowdriver: applydiff: svm failed to boot: %s", err) 825 } 826 827 // TODO @jhowardmsft - the retries are temporary to overcome platform reliability issues. 828 // Obviously this will be removed as platform bugs are fixed. 829 retries := 0 830 for { 831 retries++ 832 size, err := svm.config.TarToVhd(filepath.Join(d.dataRoot, id, layerFilename), diff) 833 if err != nil { 834 if retries <= 10 { 835 continue 836 } 837 return 0, err 838 } 839 return size, err 840 } 841 } 842 843 // Changes produces a list of changes between the specified layer 844 // and its parent layer. If parent is "", then all changes will be ADD changes. 845 // The layer should not be mounted when calling this function. 846 func (d *Driver) Changes(id, parent string) ([]archive.Change, error) { 847 logrus.Debugf("lcowdriver: changes: id %s parent %s", id, parent) 848 // TODO @gupta-ak. Needs implementation with assistance from service VM 849 return nil, nil 850 } 851 852 // DiffSize calculates the changes between the specified layer 853 // and its parent and returns the size in bytes of the changes 854 // relative to its base filesystem directory. 855 func (d *Driver) DiffSize(id, parent string) (size int64, err error) { 856 logrus.Debugf("lcowdriver: diffsize: id %s", id) 857 // TODO @gupta-ak. Needs implementation with assistance from service VM 858 return 0, nil 859 } 860 861 // GetMetadata returns custom driver information. 862 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 863 logrus.Debugf("lcowdriver: getmetadata: id %s", id) 864 m := make(map[string]string) 865 m["dir"] = d.dir(id) 866 return m, nil 867 } 868 869 // GetLayerPath gets the layer path on host (path to VHD/VHDX) 870 func (d *Driver) GetLayerPath(id string) (string, error) { 871 return d.dir(id), nil 872 } 873 874 // dir returns the absolute path to the layer. 875 func (d *Driver) dir(id string) string { 876 return filepath.Join(d.dataRoot, filepath.Base(id)) 877 } 878 879 // getLayerChain returns the layer chain information. 880 func (d *Driver) getLayerChain(id string) ([]string, error) { 881 jPath := filepath.Join(d.dir(id), "layerchain.json") 882 logrus.Debugf("lcowdriver: getlayerchain: id %s json %s", id, jPath) 883 content, err := ioutil.ReadFile(jPath) 884 if os.IsNotExist(err) { 885 return nil, nil 886 } else if err != nil { 887 return nil, fmt.Errorf("lcowdriver: getlayerchain: %s unable to read layerchain file %s: %s", id, jPath, err) 888 } 889 890 var layerChain []string 891 err = json.Unmarshal(content, &layerChain) 892 if err != nil { 893 return nil, fmt.Errorf("lcowdriver: getlayerchain: %s failed to unmarshall layerchain file %s: %s", id, jPath, err) 894 } 895 return layerChain, nil 896 } 897 898 // setLayerChain stores the layer chain information on disk. 899 func (d *Driver) setLayerChain(id string, chain []string) error { 900 content, err := json.Marshal(&chain) 901 if err != nil { 902 return fmt.Errorf("lcowdriver: setlayerchain: %s failed to marshall layerchain json: %s", id, err) 903 } 904 905 jPath := filepath.Join(d.dir(id), "layerchain.json") 906 logrus.Debugf("lcowdriver: setlayerchain: id %s json %s", id, jPath) 907 err = ioutil.WriteFile(jPath, content, 0600) 908 if err != nil { 909 return fmt.Errorf("lcowdriver: setlayerchain: %s failed to write layerchain file: %s", id, err) 910 } 911 return nil 912 } 913 914 // getLayerDetails is a utility for getting a file name, size and indication of 915 // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A 916 // read-write layer will be sandbox.vhdx. 917 func getLayerDetails(folder string) (*layerDetails, error) { 918 var fileInfo os.FileInfo 919 ld := &layerDetails{ 920 isSandbox: false, 921 filename: filepath.Join(folder, layerFilename), 922 } 923 924 fileInfo, err := os.Stat(ld.filename) 925 if err != nil { 926 ld.filename = filepath.Join(folder, sandboxFilename) 927 if fileInfo, err = os.Stat(ld.filename); err != nil { 928 return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder) 929 } 930 ld.isSandbox = true 931 } 932 ld.size = fileInfo.Size() 933 934 return ld, nil 935 } 936 937 func (d *Driver) getAllMounts(id string) ([]hcsshim.MappedVirtualDisk, error) { 938 layerChain, err := d.getLayerChain(id) 939 if err != nil { 940 return nil, err 941 } 942 layerChain = append([]string{d.dir(id)}, layerChain...) 943 944 logrus.Debugf("getting all layers: %v", layerChain) 945 disks := make([]hcsshim.MappedVirtualDisk, len(layerChain), len(layerChain)) 946 for i := range layerChain { 947 ld, err := getLayerDetails(layerChain[i]) 948 if err != nil { 949 logrus.Debugf("Failed to get LayerVhdDetails from %s: %s", layerChain[i], err) 950 return nil, err 951 } 952 disks[i].HostPath = ld.filename 953 disks[i].ContainerPath = hostToGuest(ld.filename) 954 disks[i].CreateInUtilityVM = true 955 disks[i].ReadOnly = !ld.isSandbox 956 } 957 return disks, nil 958 } 959 960 func hostToGuest(hostpath string) string { 961 return fmt.Sprintf("/tmp/%s", filepath.Base(filepath.Dir(hostpath))) 962 } 963 964 func unionMountName(disks []hcsshim.MappedVirtualDisk) string { 965 return fmt.Sprintf("%s-mount", disks[0].ContainerPath) 966 }