github.com/ssdev-go/moby@v17.12.1-ce-rc2+incompatible/daemon/graphdriver/lcow/lcow.go (about)

     1  // +build windows
     2  
     3  // Maintainer:  jhowardmsft
     4  // Locale:      en-gb
     5  // About:       Graph-driver for Linux Containers On Windows (LCOW)
     6  //
     7  // This graphdriver runs in two modes. Yet to be determined which one will
     8  // be the shipping mode. The global mode is where a single utility VM
     9  // is used for all service VM tool operations. This isn't safe security-wise
    10  // as it's attaching a sandbox of multiple containers to it, containing
    11  // untrusted data. This may be fine for client devops scenarios. In
    12  // safe mode, a unique utility VM is instantiated for all service VM tool
    13  // operations. The downside of safe-mode is that operations are slower as
    14  // a new service utility VM has to be started and torn-down when needed.
    15  //
    16  // Options:
    17  //
    18  // The following options are read by the graphdriver itself:
    19  //
    20  //   * lcow.globalmode - Enables global service VM Mode
    21  //        -- Possible values:     true/false
    22  //        -- Default if omitted:  false
    23  //
    24  //   * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container
    25  //        -- Possible values:      >= default sandbox size (opengcs defined, currently 20)
    26  //        -- Default if omitted:  20
    27  //
    28  // The following options are read by opengcs:
    29  //
    30  //   * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair
    31  //        -- Possible values:      Any local path that is not a mapped drive
    32  //        -- Default if omitted:  %ProgramFiles%\Linux Containers
    33  //
    34  //   * lcow.kernel - Specifies a custom kernel file located in the `lcow.kirdpath` path
    35  //        -- Possible values:      Any valid filename
    36  //        -- Default if omitted:  bootx64.efi
    37  //
    38  //   * lcow.initrd - Specifies a custom initrd file located in the `lcow.kirdpath` path
    39  //        -- Possible values:      Any valid filename
    40  //        -- Default if omitted:  initrd.img
    41  //
    42  //   * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode
    43  //        -- Possible values:      Any valid linux kernel boot options
    44  //        -- Default if omitted:  <nil>
    45  //
    46  //   * lcow.vhdx - Specifies a custom vhdx file to boot (instead of a kernel+initrd)
    47  //        -- Possible values:      Any valid filename
    48  //        -- Default if omitted:  uvm.vhdx under `lcow.kirdpath`
    49  //
    50  //   * lcow.timeout - Specifies a timeout for utility VM operations in seconds
    51  //        -- Possible values:      >=0
    52  //        -- Default if omitted:  300
    53  
    54  // TODO: Grab logs from SVM at terminate or errors
    55  
    56  package lcow
    57  
    58  import (
    59  	"encoding/json"
    60  	"fmt"
    61  	"io"
    62  	"io/ioutil"
    63  	"os"
    64  	"path/filepath"
    65  	"strconv"
    66  	"strings"
    67  	"sync"
    68  	"syscall"
    69  	"time"
    70  
    71  	"github.com/Microsoft/hcsshim"
    72  	"github.com/Microsoft/opengcs/client"
    73  	"github.com/docker/docker/daemon/graphdriver"
    74  	"github.com/docker/docker/pkg/archive"
    75  	"github.com/docker/docker/pkg/containerfs"
    76  	"github.com/docker/docker/pkg/idtools"
    77  	"github.com/docker/docker/pkg/ioutils"
    78  	"github.com/docker/docker/pkg/system"
    79  	"github.com/sirupsen/logrus"
    80  )
    81  
    82  // init registers this driver to the register. It gets initialised by the
    83  // function passed in the second parameter, implemented in this file.
    84  func init() {
    85  	graphdriver.Register("lcow", InitDriver)
    86  }
    87  
    88  const (
    89  	// sandboxFilename is the name of the file containing a layer's sandbox (read-write layer).
    90  	sandboxFilename = "sandbox.vhdx"
    91  
    92  	// scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory.
    93  	scratchFilename = "scratch.vhdx"
    94  
    95  	// layerFilename is the name of the file containing a layer's read-only contents.
    96  	// Note this really is VHD format, not VHDX.
    97  	layerFilename = "layer.vhd"
    98  
    99  	// toolsScratchPath is a location in a service utility VM that the tools can use as a
   100  	// scratch space to avoid running out of memory.
   101  	toolsScratchPath = "/tmp/scratch"
   102  
   103  	// svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode.
   104  	svmGlobalID = "_lcow_global_svm_"
   105  
   106  	// cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs.
   107  	cacheDirectory = "cache"
   108  
   109  	// scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs
   110  	scratchDirectory = "scratch"
   111  
   112  	// errOperationPending is the HRESULT returned by the HCS when the VM termination operation is still pending.
   113  	errOperationPending syscall.Errno = 0xc0370103
   114  )
   115  
   116  // Driver represents an LCOW graph driver.
   117  type Driver struct {
   118  	dataRoot           string     // Root path on the host where we are storing everything.
   119  	cachedSandboxFile  string     // Location of the local default-sized cached sandbox.
   120  	cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox.
   121  	cachedScratchFile  string     // Location of the local cached empty scratch space.
   122  	cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch.
   123  	options            []string   // Graphdriver options we are initialised with.
   124  	globalMode         bool       // Indicates if running in an unsafe/global service VM mode.
   125  
   126  	// NOTE: It is OK to use a cache here because Windows does not support
   127  	// restoring containers when the daemon dies.
   128  	serviceVms *serviceVMMap // Map of the configs representing the service VM(s) we are running.
   129  }
   130  
   131  // layerDetails is the structure returned by a helper function `getLayerDetails`
   132  // for getting information about a layer folder
   133  type layerDetails struct {
   134  	filename  string // \path\to\sandbox.vhdx or \path\to\layer.vhd
   135  	size      int64  // size of the above file
   136  	isSandbox bool   // true if sandbox.vhdx
   137  }
   138  
   139  // deletefiles is a helper function for initialisation where we delete any
   140  // left-over scratch files in case we were previously forcibly terminated.
   141  func deletefiles(path string, f os.FileInfo, err error) error {
   142  	if strings.HasSuffix(f.Name(), ".vhdx") {
   143  		logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path)
   144  		return os.Remove(path)
   145  	}
   146  	return nil
   147  }
   148  
   149  // InitDriver returns a new LCOW storage driver.
   150  func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) {
   151  	title := "lcowdriver: init:"
   152  
   153  	cd := filepath.Join(dataRoot, cacheDirectory)
   154  	sd := filepath.Join(dataRoot, scratchDirectory)
   155  
   156  	d := &Driver{
   157  		dataRoot:          dataRoot,
   158  		options:           options,
   159  		cachedSandboxFile: filepath.Join(cd, sandboxFilename),
   160  		cachedScratchFile: filepath.Join(cd, scratchFilename),
   161  		serviceVms: &serviceVMMap{
   162  			svms: make(map[string]*serviceVMMapItem),
   163  		},
   164  		globalMode: false,
   165  	}
   166  
   167  	// Looks for relevant options
   168  	for _, v := range options {
   169  		opt := strings.SplitN(v, "=", 2)
   170  		if len(opt) == 2 {
   171  			switch strings.ToLower(opt[0]) {
   172  			case "lcow.globalmode":
   173  				var err error
   174  				d.globalMode, err = strconv.ParseBool(opt[1])
   175  				if err != nil {
   176  					return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title)
   177  				}
   178  				break
   179  			}
   180  		}
   181  	}
   182  
   183  	// Make sure the dataRoot directory is created
   184  	if err := idtools.MkdirAllAndChown(dataRoot, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
   185  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err)
   186  	}
   187  
   188  	// Make sure the cache directory is created under dataRoot
   189  	if err := idtools.MkdirAllAndChown(cd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
   190  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err)
   191  	}
   192  
   193  	// Make sure the scratch directory is created under dataRoot
   194  	if err := idtools.MkdirAllAndChown(sd, 0700, idtools.IDPair{UID: 0, GID: 0}); err != nil {
   195  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err)
   196  	}
   197  
   198  	// Delete any items in the scratch directory
   199  	filepath.Walk(sd, deletefiles)
   200  
   201  	logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode)
   202  
   203  	return d, nil
   204  }
   205  
   206  func (d *Driver) getVMID(id string) string {
   207  	if d.globalMode {
   208  		return svmGlobalID
   209  	}
   210  	return id
   211  }
   212  
   213  // startServiceVMIfNotRunning starts a service utility VM if it is not currently running.
   214  // It can optionally be started with a mapped virtual disk. Returns a opengcs config structure
   215  // representing the VM.
   216  func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd []hcsshim.MappedVirtualDisk, context string) (_ *serviceVM, err error) {
   217  	// Use the global ID if in global mode
   218  	id = d.getVMID(id)
   219  
   220  	title := fmt.Sprintf("lcowdriver: startservicevmifnotrunning %s:", id)
   221  
   222  	// Attempt to add ID to the service vm map
   223  	logrus.Debugf("%s: Adding entry to service vm map", title)
   224  	svm, exists, err := d.serviceVms.add(id)
   225  	if err != nil && err == errVMisTerminating {
   226  		// VM is in the process of terminating. Wait until it's done and and then try again
   227  		logrus.Debugf("%s: VM with current ID still in the process of terminating: %s", title, id)
   228  		if err := svm.getStopError(); err != nil {
   229  			logrus.Debugf("%s: VM %s did not stop successfully: %s", title, id, err)
   230  			return nil, err
   231  		}
   232  		return d.startServiceVMIfNotRunning(id, mvdToAdd, context)
   233  	} else if err != nil {
   234  		logrus.Debugf("%s: failed to add service vm to map: %s", err)
   235  		return nil, fmt.Errorf("%s: failed to add to service vm map: %s", title, err)
   236  	}
   237  
   238  	if exists {
   239  		// Service VM is already up and running. In this case, just hot add the vhds.
   240  		logrus.Debugf("%s: service vm already exists. Just hot adding: %+v", title, mvdToAdd)
   241  		if err := svm.hotAddVHDs(mvdToAdd...); err != nil {
   242  			logrus.Debugf("%s: failed to hot add vhds on service vm creation: %s", title, err)
   243  			return nil, fmt.Errorf("%s: failed to hot add vhds on service vm: %s", title, err)
   244  		}
   245  		return svm, nil
   246  	}
   247  
   248  	// We are the first service for this id, so we need to start it
   249  	logrus.Debugf("%s: service vm doesn't exist. Now starting it up: %s", title, id)
   250  
   251  	defer func() {
   252  		// Signal that start has finished, passing in the error if any.
   253  		svm.signalStartFinished(err)
   254  		if err != nil {
   255  			// We added a ref to the VM, since we failed, we should delete the ref.
   256  			d.terminateServiceVM(id, "error path on startServiceVMIfNotRunning", false)
   257  		}
   258  	}()
   259  
   260  	// Generate a default configuration
   261  	if err := svm.config.GenerateDefault(d.options); err != nil {
   262  		return nil, fmt.Errorf("%s failed to generate default gogcs configuration for global svm (%s): %s", title, context, err)
   263  	}
   264  
   265  	// For the name, we deliberately suffix if safe-mode to ensure that it doesn't
   266  	// clash with another utility VM which may be running for the container itself.
   267  	// This also makes it easier to correlate through Get-ComputeProcess.
   268  	if id == svmGlobalID {
   269  		svm.config.Name = svmGlobalID
   270  	} else {
   271  		svm.config.Name = fmt.Sprintf("%s_svm", id)
   272  	}
   273  
   274  	// Ensure we take the cached scratch mutex around the check to ensure the file is complete
   275  	// and not in the process of being created by another thread.
   276  	scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   277  
   278  	logrus.Debugf("%s locking cachedScratchMutex", title)
   279  	d.cachedScratchMutex.Lock()
   280  	if _, err := os.Stat(d.cachedScratchFile); err == nil {
   281  		// Make a copy of cached scratch to the scratch directory
   282  		logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) cloning cached scratch for mvd", context)
   283  		if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
   284  			logrus.Debugf("%s releasing cachedScratchMutex on err: %s", title, err)
   285  			d.cachedScratchMutex.Unlock()
   286  			return nil, err
   287  		}
   288  
   289  		// Add the cached clone as a mapped virtual disk
   290  		logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) adding cloned scratch as mvd", context)
   291  		mvd := hcsshim.MappedVirtualDisk{
   292  			HostPath:          scratchTargetFile,
   293  			ContainerPath:     toolsScratchPath,
   294  			CreateInUtilityVM: true,
   295  		}
   296  		svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
   297  		svm.scratchAttached = true
   298  	}
   299  
   300  	logrus.Debugf("%s releasing cachedScratchMutex", title)
   301  	d.cachedScratchMutex.Unlock()
   302  
   303  	// If requested to start it with a mapped virtual disk, add it now.
   304  	svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvdToAdd...)
   305  	for _, mvd := range svm.config.MappedVirtualDisks {
   306  		svm.attachedVHDs[mvd.HostPath] = 1
   307  	}
   308  
   309  	// Start it.
   310  	logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) starting %s", context, svm.config.Name)
   311  	if err := svm.config.StartUtilityVM(); err != nil {
   312  		return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
   313  	}
   314  
   315  	// defer function to terminate the VM if the next steps fail
   316  	defer func() {
   317  		if err != nil {
   318  			waitTerminate(svm, fmt.Sprintf("startServiceVmIfNotRunning: %s (%s)", id, context))
   319  		}
   320  	}()
   321  
   322  	// Now we have a running service VM, we can create the cached scratch file if it doesn't exist.
   323  	logrus.Debugf("%s locking cachedScratchMutex", title)
   324  	d.cachedScratchMutex.Lock()
   325  	if _, err := os.Stat(d.cachedScratchFile); err != nil {
   326  		logrus.Debugf("%s (%s): creating an SVM scratch", title, context)
   327  
   328  		// Don't use svm.CreateExt4Vhdx since that only works when the service vm is setup,
   329  		// but we're still in that process right now.
   330  		if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil {
   331  			logrus.Debugf("%s (%s): releasing cachedScratchMutex on error path", title, context)
   332  			d.cachedScratchMutex.Unlock()
   333  			logrus.Debugf("%s: failed to create vm scratch %s: %s", title, scratchTargetFile, err)
   334  			return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
   335  		}
   336  	}
   337  	logrus.Debugf("%s (%s): releasing cachedScratchMutex", title, context)
   338  	d.cachedScratchMutex.Unlock()
   339  
   340  	// Hot-add the scratch-space if not already attached
   341  	if !svm.scratchAttached {
   342  		logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) hot-adding scratch %s", context, scratchTargetFile)
   343  		if err := svm.hotAddVHDsAtStart(hcsshim.MappedVirtualDisk{
   344  			HostPath:          scratchTargetFile,
   345  			ContainerPath:     toolsScratchPath,
   346  			CreateInUtilityVM: true,
   347  		}); err != nil {
   348  			logrus.Debugf("%s: failed to hot-add scratch %s: %s", title, scratchTargetFile, err)
   349  			return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
   350  		}
   351  		svm.scratchAttached = true
   352  	}
   353  
   354  	logrus.Debugf("lcowdriver: startServiceVmIfNotRunning: (%s) success", context)
   355  	return svm, nil
   356  }
   357  
   358  // terminateServiceVM terminates a service utility VM if its running if it's,
   359  // not being used by any goroutine, but does nothing when in global mode as it's
   360  // lifetime is limited to that of the daemon. If the force flag is set, then
   361  // the VM will be killed regardless of the ref count or if it's global.
   362  func (d *Driver) terminateServiceVM(id, context string, force bool) (err error) {
   363  	// We don't do anything in safe mode unless the force flag has been passed, which
   364  	// is only the case for cleanup at driver termination.
   365  	if d.globalMode && !force {
   366  		logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context)
   367  		return nil
   368  	}
   369  
   370  	id = d.getVMID(id)
   371  
   372  	var svm *serviceVM
   373  	var lastRef bool
   374  	if !force {
   375  		// In the not force case, we ref count
   376  		svm, lastRef, err = d.serviceVms.decrementRefCount(id)
   377  	} else {
   378  		// In the force case, we ignore the ref count and just set it to 0
   379  		svm, err = d.serviceVms.setRefCountZero(id)
   380  		lastRef = true
   381  	}
   382  
   383  	if err == errVMUnknown {
   384  		return nil
   385  	} else if err == errVMisTerminating {
   386  		return svm.getStopError()
   387  	} else if !lastRef {
   388  		return nil
   389  	}
   390  
   391  	// We run the deletion of the scratch as a deferred function to at least attempt
   392  	// clean-up in case of errors.
   393  	defer func() {
   394  		if svm.scratchAttached {
   395  			scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   396  			logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile)
   397  			if errRemove := os.Remove(scratchTargetFile); errRemove != nil {
   398  				logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, errRemove)
   399  				err = errRemove
   400  			}
   401  		}
   402  
   403  		// This function shouldn't actually return error unless there is a bug
   404  		if errDelete := d.serviceVms.deleteID(id); errDelete != nil {
   405  			logrus.Warnf("failed to service vm from svm map %s (%s): %s", id, context, errDelete)
   406  		}
   407  
   408  		// Signal that this VM has stopped
   409  		svm.signalStopFinished(err)
   410  	}()
   411  
   412  	// Now it's possible that the serivce VM failed to start and now we are trying to termiante it.
   413  	// In this case, we will relay the error to the goroutines waiting for this vm to stop.
   414  	if err := svm.getStartError(); err != nil {
   415  		logrus.Debugf("lcowdriver: terminateservicevm: %s had failed to start up: %s", id, err)
   416  		return err
   417  	}
   418  
   419  	if err := waitTerminate(svm, fmt.Sprintf("terminateservicevm: %s (%s)", id, context)); err != nil {
   420  		return err
   421  	}
   422  
   423  	logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context)
   424  	return nil
   425  }
   426  
   427  func waitTerminate(svm *serviceVM, context string) error {
   428  	if svm.config == nil {
   429  		return fmt.Errorf("lcowdriver: waitTermiante: Nil utility VM. %s", context)
   430  	}
   431  
   432  	logrus.Debugf("lcowdriver: waitTerminate: Calling terminate: %s", context)
   433  	if err := svm.config.Uvm.Terminate(); err != nil {
   434  		// We might get operation still pending from the HCS. In that case, we shouldn't return
   435  		// an error since we call wait right after.
   436  		underlyingError := err
   437  		if conterr, ok := err.(*hcsshim.ContainerError); ok {
   438  			underlyingError = conterr.Err
   439  		}
   440  
   441  		if syscallErr, ok := underlyingError.(syscall.Errno); ok {
   442  			underlyingError = syscallErr
   443  		}
   444  
   445  		if underlyingError != errOperationPending {
   446  			return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
   447  		}
   448  		logrus.Debugf("lcowdriver: waitTerminate: uvm.Terminate() returned operation pending (%s)", context)
   449  	}
   450  
   451  	logrus.Debugf("lcowdriver: waitTerminate: (%s) - waiting for utility VM to terminate", context)
   452  	if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil {
   453  		return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
   454  	}
   455  	return nil
   456  }
   457  
   458  // String returns the string representation of a driver. This should match
   459  // the name the graph driver has been registered with.
   460  func (d *Driver) String() string {
   461  	return "lcow"
   462  }
   463  
   464  // Status returns the status of the driver.
   465  func (d *Driver) Status() [][2]string {
   466  	return [][2]string{
   467  		{"LCOW", ""},
   468  		// TODO: Add some more info here - mode, home, ....
   469  	}
   470  }
   471  
   472  // Exists returns true if the given id is registered with this driver.
   473  func (d *Driver) Exists(id string) bool {
   474  	_, err := os.Lstat(d.dir(id))
   475  	logrus.Debugf("lcowdriver: exists: id %s %t", id, err == nil)
   476  	return err == nil
   477  }
   478  
   479  // CreateReadWrite creates a layer that is writable for use as a container
   480  // file system. That equates to creating a sandbox.
   481  func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
   482  	title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id)
   483  	logrus.Debugf(title)
   484  
   485  	// First we need to create the folder
   486  	if err := d.Create(id, parent, opts); err != nil {
   487  		return err
   488  	}
   489  
   490  	// Look for an explicit sandbox size option.
   491  	sandboxSize := uint64(client.DefaultVhdxSizeGB)
   492  	for k, v := range opts.StorageOpt {
   493  		switch strings.ToLower(k) {
   494  		case "lcow.sandboxsize":
   495  			var err error
   496  			sandboxSize, err = strconv.ParseUint(v, 10, 32)
   497  			if err != nil {
   498  				return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v)
   499  			}
   500  			if sandboxSize < client.DefaultVhdxSizeGB {
   501  				return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB)
   502  			}
   503  			break
   504  		}
   505  	}
   506  
   507  	// Massive perf optimisation here. If we know that the RW layer is the default size,
   508  	// and that the cached sandbox already exists, and we are running in safe mode, we
   509  	// can just do a simple copy into the layers sandbox file without needing to start a
   510  	// unique service VM. For a global service VM, it doesn't really matter. Of course,
   511  	// this is only the case where the sandbox is the default size.
   512  	//
   513  	// Make sure we have the sandbox mutex taken while we are examining it.
   514  	if sandboxSize == client.DefaultVhdxSizeGB {
   515  		logrus.Debugf("%s: locking cachedSandboxMutex", title)
   516  		d.cachedSandboxMutex.Lock()
   517  		_, err := os.Stat(d.cachedSandboxFile)
   518  		logrus.Debugf("%s: releasing cachedSandboxMutex", title)
   519  		d.cachedSandboxMutex.Unlock()
   520  		if err == nil {
   521  			logrus.Debugf("%s: using cached sandbox to populate", title)
   522  			if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
   523  				return err
   524  			}
   525  			return nil
   526  		}
   527  	}
   528  
   529  	logrus.Debugf("%s: creating SVM to create sandbox", title)
   530  	svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite")
   531  	if err != nil {
   532  		return err
   533  	}
   534  	defer d.terminateServiceVM(id, "createreadwrite", false)
   535  
   536  	// So the sandbox needs creating. If default size ensure we are the only thread populating the cache.
   537  	// Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex.
   538  	if sandboxSize == client.DefaultVhdxSizeGB {
   539  		logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
   540  		d.cachedSandboxMutex.Lock()
   541  		defer func() {
   542  			logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
   543  			d.cachedSandboxMutex.Unlock()
   544  		}()
   545  	}
   546  
   547  	// Make sure we don't write to our local cached copy if this is for a non-default size request.
   548  	targetCacheFile := d.cachedSandboxFile
   549  	if sandboxSize != client.DefaultVhdxSizeGB {
   550  		targetCacheFile = ""
   551  	}
   552  
   553  	// Create the ext4 vhdx
   554  	logrus.Debugf("%s: creating sandbox ext4 vhdx", title)
   555  	if err := svm.createExt4VHDX(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil {
   556  		logrus.Debugf("%s: failed to create sandbox vhdx for %s: %s", title, id, err)
   557  		return err
   558  	}
   559  	return nil
   560  }
   561  
   562  // Create creates the folder for the layer with the given id, and
   563  // adds it to the layer chain.
   564  func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
   565  	logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent)
   566  
   567  	parentChain, err := d.getLayerChain(parent)
   568  	if err != nil {
   569  		return err
   570  	}
   571  
   572  	var layerChain []string
   573  	if parent != "" {
   574  		if !d.Exists(parent) {
   575  			return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent)
   576  		}
   577  		layerChain = []string{d.dir(parent)}
   578  	}
   579  	layerChain = append(layerChain, parentChain...)
   580  
   581  	// Make sure layers are created with the correct ACL so that VMs can access them.
   582  	layerPath := d.dir(id)
   583  	logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath)
   584  	if err := system.MkdirAllWithACL(layerPath, 755, system.SddlNtvmAdministratorsLocalSystem); err != nil {
   585  		return err
   586  	}
   587  
   588  	if err := d.setLayerChain(id, layerChain); err != nil {
   589  		if err2 := os.RemoveAll(layerPath); err2 != nil {
   590  			logrus.Warnf("failed to remove layer %s: %s", layerPath, err2)
   591  		}
   592  		return err
   593  	}
   594  	logrus.Debugf("lcowdriver: create: id %s: success", id)
   595  
   596  	return nil
   597  }
   598  
   599  // Remove unmounts and removes the dir information.
   600  func (d *Driver) Remove(id string) error {
   601  	logrus.Debugf("lcowdriver: remove: id %s", id)
   602  	tmpID := fmt.Sprintf("%s-removing", id)
   603  	tmpLayerPath := d.dir(tmpID)
   604  	layerPath := d.dir(id)
   605  
   606  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s", id, layerPath)
   607  
   608  	// Unmount all the layers
   609  	err := d.Put(id)
   610  	if err != nil {
   611  		logrus.Debugf("lcowdriver: remove id %s: failed to unmount: %s", id, err)
   612  		return err
   613  	}
   614  
   615  	// for non-global case just kill the vm
   616  	if !d.globalMode {
   617  		if err := d.terminateServiceVM(id, fmt.Sprintf("Remove %s", id), true); err != nil {
   618  			return err
   619  		}
   620  	}
   621  
   622  	if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
   623  		return err
   624  	}
   625  
   626  	if err := os.RemoveAll(tmpLayerPath); err != nil {
   627  		return err
   628  	}
   629  
   630  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s succeeded", id, layerPath)
   631  	return nil
   632  }
   633  
   634  // Get returns the rootfs path for the id. It is reference counted and
   635  // effectively can be thought of as a "mount the layer into the utility
   636  // vm if it isn't already". The contract from the caller of this is that
   637  // all Gets and Puts are matched. It -should- be the case that on cleanup,
   638  // nothing is mounted.
   639  //
   640  // For optimisation, we don't actually mount the filesystem (which in our
   641  // case means [hot-]adding it to a service VM. But we track that and defer
   642  // the actual adding to the point we need to access it.
   643  func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) {
   644  	title := fmt.Sprintf("lcowdriver: get: %s", id)
   645  	logrus.Debugf(title)
   646  
   647  	// Generate the mounts needed for the defered operation.
   648  	disks, err := d.getAllMounts(id)
   649  	if err != nil {
   650  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   651  		return nil, fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   652  	}
   653  
   654  	logrus.Debugf("%s: got layer mounts: %+v", title, disks)
   655  	return &lcowfs{
   656  		root:        unionMountName(disks),
   657  		d:           d,
   658  		mappedDisks: disks,
   659  		vmID:        d.getVMID(id),
   660  	}, nil
   661  }
   662  
   663  // Put does the reverse of get. If there are no more references to
   664  // the layer, it unmounts it from the utility VM.
   665  func (d *Driver) Put(id string) error {
   666  	title := fmt.Sprintf("lcowdriver: put: %s", id)
   667  
   668  	// Get the service VM that we need to remove from
   669  	svm, err := d.serviceVms.get(d.getVMID(id))
   670  	if err == errVMUnknown {
   671  		return nil
   672  	} else if err == errVMisTerminating {
   673  		return svm.getStopError()
   674  	}
   675  
   676  	// Generate the mounts that Get() might have mounted
   677  	disks, err := d.getAllMounts(id)
   678  	if err != nil {
   679  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   680  		return fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   681  	}
   682  
   683  	// Now, we want to perform the unmounts, hot-remove and stop the service vm.
   684  	// We want to go though all the steps even if we have an error to clean up properly
   685  	err = svm.deleteUnionMount(unionMountName(disks), disks...)
   686  	if err != nil {
   687  		logrus.Debugf("%s failed to delete union mount %s: %s", title, id, err)
   688  	}
   689  
   690  	err1 := svm.hotRemoveVHDs(disks...)
   691  	if err1 != nil {
   692  		logrus.Debugf("%s failed to hot remove vhds %s: %s", title, id, err)
   693  		if err == nil {
   694  			err = err1
   695  		}
   696  	}
   697  
   698  	err1 = d.terminateServiceVM(id, fmt.Sprintf("Put %s", id), false)
   699  	if err1 != nil {
   700  		logrus.Debugf("%s failed to terminate service vm %s: %s", title, id, err1)
   701  		if err == nil {
   702  			err = err1
   703  		}
   704  	}
   705  	logrus.Debugf("Put succeeded on id %s", id)
   706  	return err
   707  }
   708  
   709  // Cleanup ensures the information the driver stores is properly removed.
   710  // We use this opportunity to cleanup any -removing folders which may be
   711  // still left if the daemon was killed while it was removing a layer.
   712  func (d *Driver) Cleanup() error {
   713  	title := "lcowdriver: cleanup"
   714  
   715  	items, err := ioutil.ReadDir(d.dataRoot)
   716  	if err != nil {
   717  		if os.IsNotExist(err) {
   718  			return nil
   719  		}
   720  		return err
   721  	}
   722  
   723  	// Note we don't return an error below - it's possible the files
   724  	// are locked. However, next time around after the daemon exits,
   725  	// we likely will be able to to cleanup successfully. Instead we log
   726  	// warnings if there are errors.
   727  	for _, item := range items {
   728  		if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
   729  			if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil {
   730  				logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err)
   731  			} else {
   732  				logrus.Infof("%s cleaned up %s", title, item.Name())
   733  			}
   734  		}
   735  	}
   736  
   737  	// Cleanup any service VMs we have running, along with their scratch spaces.
   738  	// We don't take the lock for this as it's taken in terminateServiceVm.
   739  	for k, v := range d.serviceVms.svms {
   740  		logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
   741  		d.terminateServiceVM(k, "cleanup", true)
   742  	}
   743  
   744  	return nil
   745  }
   746  
   747  // Diff takes a layer (and it's parent layer which may be null, but
   748  // is ignored by this implementation below) and returns a reader for
   749  // a tarstream representing the layers contents. The id could be
   750  // a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics
   751  // of this function dictate that the layer is already mounted.
   752  // However, as we do lazy mounting as a performance optimisation,
   753  // this will likely not be the case.
   754  func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
   755  	title := fmt.Sprintf("lcowdriver: diff: %s", id)
   756  
   757  	// Get VHDX info
   758  	ld, err := getLayerDetails(d.dir(id))
   759  	if err != nil {
   760  		logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
   761  		return nil, err
   762  	}
   763  
   764  	// Start the SVM with a mapped virtual disk. Note that if the SVM is
   765  	// already running and we are in global mode, this will be
   766  	// hot-added.
   767  	mvd := hcsshim.MappedVirtualDisk{
   768  		HostPath:          ld.filename,
   769  		ContainerPath:     hostToGuest(ld.filename),
   770  		CreateInUtilityVM: true,
   771  		ReadOnly:          true,
   772  	}
   773  
   774  	logrus.Debugf("%s: starting service VM", title)
   775  	svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diff %s", id))
   776  	if err != nil {
   777  		return nil, err
   778  	}
   779  
   780  	logrus.Debugf("lcowdriver: diff: waiting for svm to finish booting")
   781  	err = svm.getStartError()
   782  	if err != nil {
   783  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   784  		return nil, fmt.Errorf("lcowdriver: diff: svm failed to boot: %s", err)
   785  	}
   786  
   787  	// Obtain the tar stream for it
   788  	logrus.Debugf("%s: %s %s, size %d, ReadOnly %t", title, ld.filename, mvd.ContainerPath, ld.size, ld.isSandbox)
   789  	tarReadCloser, err := svm.config.VhdToTar(mvd.HostPath, mvd.ContainerPath, ld.isSandbox, ld.size)
   790  	if err != nil {
   791  		svm.hotRemoveVHDs(mvd)
   792  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   793  		return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
   794  	}
   795  
   796  	logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent)
   797  
   798  	// In safe/non-global mode, we can't tear down the service VM until things have been read.
   799  	return ioutils.NewReadCloserWrapper(tarReadCloser, func() error {
   800  		tarReadCloser.Close()
   801  		svm.hotRemoveVHDs(mvd)
   802  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   803  		return nil
   804  	}), nil
   805  }
   806  
   807  // ApplyDiff extracts the changeset from the given diff into the
   808  // layer with the specified id and parent, returning the size of the
   809  // new layer in bytes. The layer should not be mounted when calling
   810  // this function. Another way of describing this is that ApplyDiff writes
   811  // to a new layer (a VHD in LCOW) the contents of a tarstream it's given.
   812  func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
   813  	logrus.Debugf("lcowdriver: applydiff: id %s", id)
   814  
   815  	svm, err := d.startServiceVMIfNotRunning(id, nil, fmt.Sprintf("applydiff %s", id))
   816  	if err != nil {
   817  		return 0, err
   818  	}
   819  	defer d.terminateServiceVM(id, fmt.Sprintf("applydiff %s", id), false)
   820  
   821  	logrus.Debugf("lcowdriver: applydiff: waiting for svm to finish booting")
   822  	err = svm.getStartError()
   823  	if err != nil {
   824  		return 0, fmt.Errorf("lcowdriver: applydiff: svm failed to boot: %s", err)
   825  	}
   826  
   827  	// TODO @jhowardmsft - the retries are temporary to overcome platform reliability issues.
   828  	// Obviously this will be removed as platform bugs are fixed.
   829  	retries := 0
   830  	for {
   831  		retries++
   832  		size, err := svm.config.TarToVhd(filepath.Join(d.dataRoot, id, layerFilename), diff)
   833  		if err != nil {
   834  			if retries <= 10 {
   835  				continue
   836  			}
   837  			return 0, err
   838  		}
   839  		return size, err
   840  	}
   841  }
   842  
   843  // Changes produces a list of changes between the specified layer
   844  // and its parent layer. If parent is "", then all changes will be ADD changes.
   845  // The layer should not be mounted when calling this function.
   846  func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
   847  	logrus.Debugf("lcowdriver: changes: id %s parent %s", id, parent)
   848  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   849  	return nil, nil
   850  }
   851  
   852  // DiffSize calculates the changes between the specified layer
   853  // and its parent and returns the size in bytes of the changes
   854  // relative to its base filesystem directory.
   855  func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
   856  	logrus.Debugf("lcowdriver: diffsize: id %s", id)
   857  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   858  	return 0, nil
   859  }
   860  
   861  // GetMetadata returns custom driver information.
   862  func (d *Driver) GetMetadata(id string) (map[string]string, error) {
   863  	logrus.Debugf("lcowdriver: getmetadata: id %s", id)
   864  	m := make(map[string]string)
   865  	m["dir"] = d.dir(id)
   866  	return m, nil
   867  }
   868  
   869  // GetLayerPath gets the layer path on host (path to VHD/VHDX)
   870  func (d *Driver) GetLayerPath(id string) (string, error) {
   871  	return d.dir(id), nil
   872  }
   873  
   874  // dir returns the absolute path to the layer.
   875  func (d *Driver) dir(id string) string {
   876  	return filepath.Join(d.dataRoot, filepath.Base(id))
   877  }
   878  
   879  // getLayerChain returns the layer chain information.
   880  func (d *Driver) getLayerChain(id string) ([]string, error) {
   881  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   882  	logrus.Debugf("lcowdriver: getlayerchain: id %s json %s", id, jPath)
   883  	content, err := ioutil.ReadFile(jPath)
   884  	if os.IsNotExist(err) {
   885  		return nil, nil
   886  	} else if err != nil {
   887  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s unable to read layerchain file %s: %s", id, jPath, err)
   888  	}
   889  
   890  	var layerChain []string
   891  	err = json.Unmarshal(content, &layerChain)
   892  	if err != nil {
   893  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s failed to unmarshall layerchain file %s: %s", id, jPath, err)
   894  	}
   895  	return layerChain, nil
   896  }
   897  
   898  // setLayerChain stores the layer chain information on disk.
   899  func (d *Driver) setLayerChain(id string, chain []string) error {
   900  	content, err := json.Marshal(&chain)
   901  	if err != nil {
   902  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to marshall layerchain json: %s", id, err)
   903  	}
   904  
   905  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   906  	logrus.Debugf("lcowdriver: setlayerchain: id %s json %s", id, jPath)
   907  	err = ioutil.WriteFile(jPath, content, 0600)
   908  	if err != nil {
   909  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to write layerchain file: %s", id, err)
   910  	}
   911  	return nil
   912  }
   913  
   914  // getLayerDetails is a utility for getting a file name, size and indication of
   915  // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
   916  // read-write layer will be sandbox.vhdx.
   917  func getLayerDetails(folder string) (*layerDetails, error) {
   918  	var fileInfo os.FileInfo
   919  	ld := &layerDetails{
   920  		isSandbox: false,
   921  		filename:  filepath.Join(folder, layerFilename),
   922  	}
   923  
   924  	fileInfo, err := os.Stat(ld.filename)
   925  	if err != nil {
   926  		ld.filename = filepath.Join(folder, sandboxFilename)
   927  		if fileInfo, err = os.Stat(ld.filename); err != nil {
   928  			return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder)
   929  		}
   930  		ld.isSandbox = true
   931  	}
   932  	ld.size = fileInfo.Size()
   933  
   934  	return ld, nil
   935  }
   936  
   937  func (d *Driver) getAllMounts(id string) ([]hcsshim.MappedVirtualDisk, error) {
   938  	layerChain, err := d.getLayerChain(id)
   939  	if err != nil {
   940  		return nil, err
   941  	}
   942  	layerChain = append([]string{d.dir(id)}, layerChain...)
   943  
   944  	logrus.Debugf("getting all  layers: %v", layerChain)
   945  	disks := make([]hcsshim.MappedVirtualDisk, len(layerChain), len(layerChain))
   946  	for i := range layerChain {
   947  		ld, err := getLayerDetails(layerChain[i])
   948  		if err != nil {
   949  			logrus.Debugf("Failed to get LayerVhdDetails from %s: %s", layerChain[i], err)
   950  			return nil, err
   951  		}
   952  		disks[i].HostPath = ld.filename
   953  		disks[i].ContainerPath = hostToGuest(ld.filename)
   954  		disks[i].CreateInUtilityVM = true
   955  		disks[i].ReadOnly = !ld.isSandbox
   956  	}
   957  	return disks, nil
   958  }
   959  
   960  func hostToGuest(hostpath string) string {
   961  	return fmt.Sprintf("/tmp/%s", filepath.Base(filepath.Dir(hostpath)))
   962  }
   963  
   964  func unionMountName(disks []hcsshim.MappedVirtualDisk) string {
   965  	return fmt.Sprintf("%s-mount", disks[0].ContainerPath)
   966  }