github.com/jwhonce/docker@v0.6.7-0.20190327063223-da823cf3a5a3/daemon/graphdriver/lcow/lcow.go (about)

     1  // +build windows
     2  
     3  // Maintainer:  jhowardmsft
     4  // Locale:      en-gb
     5  // About:       Graph-driver for Linux Containers On Windows (LCOW)
     6  //
     7  // This graphdriver runs in two modes. Yet to be determined which one will
     8  // be the shipping mode. The global mode is where a single utility VM
     9  // is used for all service VM tool operations. This isn't safe security-wise
    10  // as it's attaching a sandbox of multiple containers to it, containing
    11  // untrusted data. This may be fine for client devops scenarios. In
    12  // safe mode, a unique utility VM is instantiated for all service VM tool
    13  // operations. The downside of safe-mode is that operations are slower as
    14  // a new service utility VM has to be started and torn-down when needed.
    15  //
    16  // Options:
    17  //
    18  // The following options are read by the graphdriver itself:
    19  //
    20  //   * lcow.globalmode - Enables global service VM Mode
    21  //        -- Possible values:     true/false
    22  //        -- Default if omitted:  false
    23  //
    24  //   * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container
    25  //        -- Possible values:      >= default sandbox size (opengcs defined, currently 20)
    26  //        -- Default if omitted:  20
    27  //
    28  // The following options are read by opengcs:
    29  //
    30  //   * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair
    31  //        -- Possible values:      Any local path that is not a mapped drive
    32  //        -- Default if omitted:  %ProgramFiles%\Linux Containers
    33  //
    34  //   * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode
    35  //        -- Possible values:      Any valid linux kernel boot options
    36  //        -- Default if omitted:  <nil>
    37  //
    38  //   * lcow.timeout - Specifies a timeout for utility VM operations in seconds
    39  //        -- Possible values:      >=0
    40  //        -- Default if omitted:  300
    41  
    42  // TODO: Grab logs from SVM at terminate or errors
    43  
    44  package lcow // import "github.com/docker/docker/daemon/graphdriver/lcow"
    45  
    46  import (
    47  	"bytes"
    48  	"encoding/json"
    49  	"fmt"
    50  	"io"
    51  	"io/ioutil"
    52  	"os"
    53  	"path"
    54  	"path/filepath"
    55  	"strconv"
    56  	"strings"
    57  	"sync"
    58  	"syscall"
    59  	"time"
    60  
    61  	"github.com/Microsoft/go-winio/pkg/security"
    62  	"github.com/Microsoft/hcsshim"
    63  	"github.com/Microsoft/hcsshim/ext4/tar2ext4"
    64  	"github.com/Microsoft/opengcs/client"
    65  	"github.com/docker/docker/daemon/graphdriver"
    66  	"github.com/docker/docker/pkg/archive"
    67  	"github.com/docker/docker/pkg/containerfs"
    68  	"github.com/docker/docker/pkg/idtools"
    69  	"github.com/docker/docker/pkg/ioutils"
    70  	"github.com/docker/docker/pkg/reexec"
    71  	"github.com/sirupsen/logrus"
    72  )
    73  
    74  // noreexec controls reexec functionality. Off by default, on for debugging purposes.
    75  var noreexec = false
    76  
    77  // init registers this driver to the register. It gets initialised by the
    78  // function passed in the second parameter, implemented in this file.
    79  func init() {
    80  	graphdriver.Register("lcow", InitDriver)
    81  	// DOCKER_LCOW_NOREEXEC allows for inline processing which makes
    82  	// debugging issues in the re-exec codepath significantly easier.
    83  	if os.Getenv("DOCKER_LCOW_NOREEXEC") != "" {
    84  		logrus.Warnf("LCOW Graphdriver is set to not re-exec. This is intended for debugging purposes only.")
    85  		noreexec = true
    86  	} else {
    87  		reexec.Register("docker-lcow-tar2ext4", tar2ext4Reexec)
    88  	}
    89  }
    90  
    91  const (
    92  	// sandboxFilename is the name of the file containing a layer's sandbox (read-write layer).
    93  	sandboxFilename = "sandbox.vhdx"
    94  
    95  	// scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory.
    96  	scratchFilename = "scratch.vhdx"
    97  
    98  	// layerFilename is the name of the file containing a layer's read-only contents.
    99  	// Note this really is VHD format, not VHDX.
   100  	layerFilename = "layer.vhd"
   101  
   102  	// toolsScratchPath is a location in a service utility VM that the tools can use as a
   103  	// scratch space to avoid running out of memory.
   104  	toolsScratchPath = "/tmp/scratch"
   105  
   106  	// svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode.
   107  	svmGlobalID = "_lcow_global_svm_"
   108  
   109  	// cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs.
   110  	cacheDirectory = "cache"
   111  
   112  	// scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs
   113  	scratchDirectory = "scratch"
   114  
   115  	// errOperationPending is the HRESULT returned by the HCS when the VM termination operation is still pending.
   116  	errOperationPending syscall.Errno = 0xc0370103
   117  )
   118  
   119  // Driver represents an LCOW graph driver.
   120  type Driver struct {
   121  	dataRoot           string     // Root path on the host where we are storing everything.
   122  	cachedSandboxFile  string     // Location of the local default-sized cached sandbox.
   123  	cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox.
   124  	cachedScratchFile  string     // Location of the local cached empty scratch space.
   125  	cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch.
   126  	options            []string   // Graphdriver options we are initialised with.
   127  	globalMode         bool       // Indicates if running in an unsafe/global service VM mode.
   128  
   129  	// NOTE: It is OK to use a cache here because Windows does not support
   130  	// restoring containers when the daemon dies.
   131  	serviceVms *serviceVMMap // Map of the configs representing the service VM(s) we are running.
   132  }
   133  
   134  // layerDetails is the structure returned by a helper function `getLayerDetails`
   135  // for getting information about a layer folder
   136  type layerDetails struct {
   137  	filename  string // \path\to\sandbox.vhdx or \path\to\layer.vhd
   138  	size      int64  // size of the above file
   139  	isSandbox bool   // true if sandbox.vhdx
   140  }
   141  
   142  // deletefiles is a helper function for initialisation where we delete any
   143  // left-over scratch files in case we were previously forcibly terminated.
   144  func deletefiles(path string, f os.FileInfo, err error) error {
   145  	if strings.HasSuffix(f.Name(), ".vhdx") {
   146  		logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path)
   147  		return os.Remove(path)
   148  	}
   149  	return nil
   150  }
   151  
   152  // InitDriver returns a new LCOW storage driver.
   153  func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) {
   154  	title := "lcowdriver: init:"
   155  
   156  	cd := filepath.Join(dataRoot, cacheDirectory)
   157  	sd := filepath.Join(dataRoot, scratchDirectory)
   158  
   159  	d := &Driver{
   160  		dataRoot:          dataRoot,
   161  		options:           options,
   162  		cachedSandboxFile: filepath.Join(cd, sandboxFilename),
   163  		cachedScratchFile: filepath.Join(cd, scratchFilename),
   164  		serviceVms: &serviceVMMap{
   165  			svms: make(map[string]*serviceVMMapItem),
   166  		},
   167  		globalMode: false,
   168  	}
   169  
   170  	// Looks for relevant options
   171  	for _, v := range options {
   172  		opt := strings.SplitN(v, "=", 2)
   173  		if len(opt) == 2 {
   174  			switch strings.ToLower(opt[0]) {
   175  			case "lcow.globalmode":
   176  				var err error
   177  				d.globalMode, err = strconv.ParseBool(opt[1])
   178  				if err != nil {
   179  					return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title)
   180  				}
   181  				break
   182  			}
   183  		}
   184  	}
   185  
   186  	// Make sure the dataRoot directory is created
   187  	if err := idtools.MkdirAllAndChown(dataRoot, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   188  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err)
   189  	}
   190  
   191  	// Make sure the cache directory is created under dataRoot
   192  	if err := idtools.MkdirAllAndChown(cd, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   193  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err)
   194  	}
   195  
   196  	// Make sure the scratch directory is created under dataRoot
   197  	if err := idtools.MkdirAllAndChown(sd, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   198  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err)
   199  	}
   200  
   201  	// Delete any items in the scratch directory
   202  	filepath.Walk(sd, deletefiles)
   203  
   204  	logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode)
   205  
   206  	return d, nil
   207  }
   208  
   209  func (d *Driver) getVMID(id string) string {
   210  	if d.globalMode {
   211  		return svmGlobalID
   212  	}
   213  	return id
   214  }
   215  
   216  // remapLongToShortContainerPath does the mapping of a long container path for a
   217  // SCSI attached disk, to a short container path where it's actually mounted.
   218  func remapLongToShortContainerPath(longContainerPath string, attachCounter uint64, svmName string) string {
   219  	shortContainerPath := longContainerPath
   220  	if shortContainerPath != "" && shortContainerPath != toolsScratchPath {
   221  		shortContainerPath = fmt.Sprintf("/tmp/d%d", attachCounter)
   222  		logrus.Debugf("lcowdriver: UVM %s: remapping %s --> %s", svmName, longContainerPath, shortContainerPath)
   223  	}
   224  	return shortContainerPath
   225  }
   226  
   227  // startServiceVMIfNotRunning starts a service utility VM if it is not currently running.
   228  // It can optionally be started with a mapped virtual disk. Returns a opengcs config structure
   229  // representing the VM.
   230  func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd []hcsshim.MappedVirtualDisk, context string) (_ *serviceVM, err error) {
   231  	// Use the global ID if in global mode
   232  	id = d.getVMID(id)
   233  
   234  	title := "lcowdriver: startServiceVMIfNotRunning " + id
   235  
   236  	// Attempt to add ID to the service vm map
   237  	logrus.Debugf("%s: adding entry to service vm map", title)
   238  	svm, exists, err := d.serviceVms.add(id)
   239  	if err != nil && err == errVMisTerminating {
   240  		// VM is in the process of terminating. Wait until it's done and then try again
   241  		logrus.Debugf("%s: VM with current ID still in the process of terminating", title)
   242  		if err := svm.getStopError(); err != nil {
   243  			logrus.Debugf("%s: VM did not stop successfully: %s", title, err)
   244  			return nil, err
   245  		}
   246  		return d.startServiceVMIfNotRunning(id, mvdToAdd, context)
   247  	} else if err != nil {
   248  		logrus.Debugf("%s: failed to add service vm to map: %s", title, err)
   249  		return nil, fmt.Errorf("%s: failed to add to service vm map: %s", title, err)
   250  	}
   251  
   252  	if exists {
   253  		// Service VM is already up and running. In this case, just hot add the vhds.
   254  		// Note that hotAddVHDs will remap long to short container paths, so no need
   255  		// for us to that here.
   256  		logrus.Debugf("%s: service vm already exists. Just hot adding: %+v", title, mvdToAdd)
   257  		if err := svm.hotAddVHDs(mvdToAdd...); err != nil {
   258  			logrus.Debugf("%s: failed to hot add vhds on service vm creation: %s", title, err)
   259  			return nil, fmt.Errorf("%s: failed to hot add vhds on service vm: %s", title, err)
   260  		}
   261  		return svm, nil
   262  	}
   263  
   264  	// We are the first service for this id, so we need to start it
   265  	logrus.Debugf("%s: service vm doesn't exist. Now starting it up", title)
   266  
   267  	defer func() {
   268  		// Signal that start has finished, passing in the error if any.
   269  		svm.signalStartFinished(err)
   270  		if err != nil {
   271  			// We added a ref to the VM, since we failed, we should delete the ref.
   272  			d.terminateServiceVM(id, "error path on startServiceVMIfNotRunning", false)
   273  		}
   274  	}()
   275  
   276  	// Generate a default configuration
   277  	if err := svm.config.GenerateDefault(d.options); err != nil {
   278  		return nil, fmt.Errorf("%s: failed to generate default gogcs configuration for global svm (%s): %s", title, context, err)
   279  	}
   280  
   281  	// For the name, we deliberately suffix if safe-mode to ensure that it doesn't
   282  	// clash with another utility VM which may be running for the container itself.
   283  	// This also makes it easier to correlate through Get-ComputeProcess.
   284  	if id == svmGlobalID {
   285  		svm.config.Name = svmGlobalID
   286  	} else {
   287  		svm.config.Name = fmt.Sprintf("%s_svm", id)
   288  	}
   289  
   290  	// Ensure we take the cached scratch mutex around the check to ensure the file is complete
   291  	// and not in the process of being created by another thread.
   292  	scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   293  
   294  	logrus.Debugf("%s: locking cachedScratchMutex", title)
   295  	d.cachedScratchMutex.Lock()
   296  	if _, err := os.Stat(d.cachedScratchFile); err == nil {
   297  		// Make a copy of cached scratch to the scratch directory
   298  		logrus.Debugf("%s: (%s) cloning cached scratch for mvd", title, context)
   299  		if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
   300  			logrus.Debugf("%s: releasing cachedScratchMutex on err: %s", title, err)
   301  			d.cachedScratchMutex.Unlock()
   302  			return nil, err
   303  		}
   304  
   305  		// Add the cached clone as a mapped virtual disk
   306  		logrus.Debugf("%s: (%s) adding cloned scratch as mvd", title, context)
   307  		mvd := hcsshim.MappedVirtualDisk{
   308  			HostPath:          scratchTargetFile,
   309  			ContainerPath:     toolsScratchPath,
   310  			CreateInUtilityVM: true,
   311  		}
   312  		svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
   313  		svm.scratchAttached = true
   314  	}
   315  
   316  	logrus.Debugf("%s: releasing cachedScratchMutex", title)
   317  	d.cachedScratchMutex.Unlock()
   318  
   319  	// Add mapped virtual disks. First those that are already in the configuration. Generally,
   320  	// the only one that will be here is the service VMs scratch. The exception is when invoked
   321  	// via the graphdrivers DiffGetter implementation.
   322  	for i, mvd := range svm.config.MappedVirtualDisks {
   323  		svm.attachCounter++
   324  		svm.attachedVHDs[mvd.HostPath] = &attachedVHD{refCount: 1, attachCounter: svm.attachCounter}
   325  
   326  		// No-op for the service VMs scratch disk. Only applicable in the DiffGetter interface invocation.
   327  		svm.config.MappedVirtualDisks[i].ContainerPath = remapLongToShortContainerPath(mvd.ContainerPath, svm.attachCounter, svm.config.Name)
   328  	}
   329  
   330  	// Then the remaining ones to add, and adding them to the startup configuration.
   331  	for _, mvd := range mvdToAdd {
   332  		svm.attachCounter++
   333  		svm.attachedVHDs[mvd.HostPath] = &attachedVHD{refCount: 1, attachCounter: svm.attachCounter}
   334  		mvd.ContainerPath = remapLongToShortContainerPath(mvd.ContainerPath, svm.attachCounter, svm.config.Name)
   335  		svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
   336  	}
   337  
   338  	// Start it.
   339  	logrus.Debugf("%s: (%s) starting %s", title, context, svm.config.Name)
   340  	if err := svm.config.StartUtilityVM(); err != nil {
   341  		return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
   342  	}
   343  
   344  	// defer function to terminate the VM if the next steps fail
   345  	defer func() {
   346  		if err != nil {
   347  			waitTerminate(svm, fmt.Sprintf("%s: (%s)", title, context))
   348  		}
   349  	}()
   350  
   351  	// Now we have a running service VM, we can create the cached scratch file if it doesn't exist.
   352  	logrus.Debugf("%s: locking cachedScratchMutex", title)
   353  	d.cachedScratchMutex.Lock()
   354  	if _, err := os.Stat(d.cachedScratchFile); err != nil {
   355  		logrus.Debugf("%s: (%s) creating an SVM scratch", title, context)
   356  
   357  		// Don't use svm.CreateExt4Vhdx since that only works when the service vm is setup,
   358  		// but we're still in that process right now.
   359  		if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil {
   360  			logrus.Debugf("%s: (%s) releasing cachedScratchMutex on error path", title, context)
   361  			d.cachedScratchMutex.Unlock()
   362  			logrus.Debugf("%s: failed to create vm scratch %s: %s", title, scratchTargetFile, err)
   363  			return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
   364  		}
   365  	}
   366  	logrus.Debugf("%s: (%s) releasing cachedScratchMutex", title, context)
   367  	d.cachedScratchMutex.Unlock()
   368  
   369  	// Hot-add the scratch-space if not already attached
   370  	if !svm.scratchAttached {
   371  		logrus.Debugf("%s: (%s) hot-adding scratch %s", title, context, scratchTargetFile)
   372  		if err := svm.hotAddVHDsAtStart(hcsshim.MappedVirtualDisk{
   373  			HostPath:          scratchTargetFile,
   374  			ContainerPath:     toolsScratchPath,
   375  			CreateInUtilityVM: true,
   376  		}); err != nil {
   377  			logrus.Debugf("%s: failed to hot-add scratch %s: %s", title, scratchTargetFile, err)
   378  			return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
   379  		}
   380  		svm.scratchAttached = true
   381  		// Don't need to ref-count here as it will be done via hotAddVHDsAtStart() call above.
   382  	}
   383  
   384  	logrus.Debugf("%s: (%s) success", title, context)
   385  	return svm, nil
   386  }
   387  
   388  // terminateServiceVM terminates a service utility VM if its running if it's,
   389  // not being used by any goroutine, but does nothing when in global mode as it's
   390  // lifetime is limited to that of the daemon. If the force flag is set, then
   391  // the VM will be killed regardless of the ref count or if it's global.
   392  func (d *Driver) terminateServiceVM(id, context string, force bool) (err error) {
   393  	// We don't do anything in safe mode unless the force flag has been passed, which
   394  	// is only the case for cleanup at driver termination.
   395  	if d.globalMode && !force {
   396  		logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context)
   397  		return nil
   398  	}
   399  
   400  	id = d.getVMID(id)
   401  
   402  	var svm *serviceVM
   403  	var lastRef bool
   404  	if !force {
   405  		// In the not force case, we ref count
   406  		svm, lastRef, err = d.serviceVms.decrementRefCount(id)
   407  	} else {
   408  		// In the force case, we ignore the ref count and just set it to 0
   409  		svm, err = d.serviceVms.setRefCountZero(id)
   410  		lastRef = true
   411  	}
   412  
   413  	if err == errVMUnknown {
   414  		return nil
   415  	} else if err == errVMisTerminating {
   416  		return svm.getStopError()
   417  	} else if !lastRef {
   418  		return nil
   419  	}
   420  
   421  	// We run the deletion of the scratch as a deferred function to at least attempt
   422  	// clean-up in case of errors.
   423  	defer func() {
   424  		if svm.scratchAttached {
   425  			scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   426  			logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile)
   427  			if errRemove := os.Remove(scratchTargetFile); errRemove != nil {
   428  				logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, errRemove)
   429  				err = errRemove
   430  			}
   431  		}
   432  
   433  		// This function shouldn't actually return error unless there is a bug
   434  		if errDelete := d.serviceVms.deleteID(id); errDelete != nil {
   435  			logrus.Warnf("failed to service vm from svm map %s (%s): %s", id, context, errDelete)
   436  		}
   437  
   438  		// Signal that this VM has stopped
   439  		svm.signalStopFinished(err)
   440  	}()
   441  
   442  	// Now it's possible that the service VM failed to start and now we are trying to terminate it.
   443  	// In this case, we will relay the error to the goroutines waiting for this vm to stop.
   444  	if err := svm.getStartError(); err != nil {
   445  		logrus.Debugf("lcowdriver: terminateservicevm: %s had failed to start up: %s", id, err)
   446  		return err
   447  	}
   448  
   449  	if err := waitTerminate(svm, fmt.Sprintf("terminateservicevm: %s (%s)", id, context)); err != nil {
   450  		return err
   451  	}
   452  
   453  	logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context)
   454  	return nil
   455  }
   456  
   457  func waitTerminate(svm *serviceVM, context string) error {
   458  	if svm.config == nil {
   459  		return fmt.Errorf("lcowdriver: waitTermiante: Nil utility VM. %s", context)
   460  	}
   461  
   462  	logrus.Debugf("lcowdriver: waitTerminate: Calling terminate: %s", context)
   463  	if err := svm.config.Uvm.Terminate(); err != nil {
   464  		// We might get operation still pending from the HCS. In that case, we shouldn't return
   465  		// an error since we call wait right after.
   466  		underlyingError := err
   467  		if conterr, ok := err.(*hcsshim.ContainerError); ok {
   468  			underlyingError = conterr.Err
   469  		}
   470  
   471  		if syscallErr, ok := underlyingError.(syscall.Errno); ok {
   472  			underlyingError = syscallErr
   473  		}
   474  
   475  		if underlyingError != errOperationPending {
   476  			return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
   477  		}
   478  		logrus.Debugf("lcowdriver: waitTerminate: uvm.Terminate() returned operation pending (%s)", context)
   479  	}
   480  
   481  	logrus.Debugf("lcowdriver: waitTerminate: (%s) - waiting for utility VM to terminate", context)
   482  	if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil {
   483  		return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
   484  	}
   485  	return nil
   486  }
   487  
   488  // String returns the string representation of a driver. This should match
   489  // the name the graph driver has been registered with.
   490  func (d *Driver) String() string {
   491  	return "lcow"
   492  }
   493  
   494  // Status returns the status of the driver.
   495  func (d *Driver) Status() [][2]string {
   496  	return [][2]string{
   497  		{"LCOW", ""},
   498  		// TODO: Add some more info here - mode, home, ....
   499  	}
   500  }
   501  
   502  // Exists returns true if the given id is registered with this driver.
   503  func (d *Driver) Exists(id string) bool {
   504  	_, err := os.Lstat(d.dir(id))
   505  	logrus.Debugf("lcowdriver: exists: id %s %t", id, err == nil)
   506  	return err == nil
   507  }
   508  
   509  // CreateReadWrite creates a layer that is writable for use as a container
   510  // file system. That equates to creating a sandbox.
   511  func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
   512  	title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id)
   513  	logrus.Debugf(title)
   514  
   515  	// First we need to create the folder
   516  	if err := d.Create(id, parent, opts); err != nil {
   517  		return err
   518  	}
   519  
   520  	// Look for an explicit sandbox size option.
   521  	sandboxSize := uint64(client.DefaultVhdxSizeGB)
   522  	for k, v := range opts.StorageOpt {
   523  		switch strings.ToLower(k) {
   524  		case "lcow.sandboxsize":
   525  			var err error
   526  			sandboxSize, err = strconv.ParseUint(v, 10, 32)
   527  			if err != nil {
   528  				return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v)
   529  			}
   530  			if sandboxSize < client.DefaultVhdxSizeGB {
   531  				return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB)
   532  			}
   533  			break
   534  		}
   535  	}
   536  
   537  	// Massive perf optimisation here. If we know that the RW layer is the default size,
   538  	// and that the cached sandbox already exists, and we are running in safe mode, we
   539  	// can just do a simple copy into the layers sandbox file without needing to start a
   540  	// unique service VM. For a global service VM, it doesn't really matter. Of course,
   541  	// this is only the case where the sandbox is the default size.
   542  	//
   543  	// Make sure we have the sandbox mutex taken while we are examining it.
   544  	if sandboxSize == client.DefaultVhdxSizeGB {
   545  		logrus.Debugf("%s: locking cachedSandboxMutex", title)
   546  		d.cachedSandboxMutex.Lock()
   547  		_, err := os.Stat(d.cachedSandboxFile)
   548  		logrus.Debugf("%s: releasing cachedSandboxMutex", title)
   549  		d.cachedSandboxMutex.Unlock()
   550  		if err == nil {
   551  			logrus.Debugf("%s: using cached sandbox to populate", title)
   552  			if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
   553  				return err
   554  			}
   555  			return nil
   556  		}
   557  	}
   558  
   559  	logrus.Debugf("%s: creating SVM to create sandbox", title)
   560  	svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite")
   561  	if err != nil {
   562  		return err
   563  	}
   564  	defer d.terminateServiceVM(id, "createreadwrite", false)
   565  
   566  	// So the sandbox needs creating. If default size ensure we are the only thread populating the cache.
   567  	// Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex.
   568  	if sandboxSize == client.DefaultVhdxSizeGB {
   569  		logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
   570  		d.cachedSandboxMutex.Lock()
   571  		defer func() {
   572  			logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
   573  			d.cachedSandboxMutex.Unlock()
   574  		}()
   575  	}
   576  
   577  	// Make sure we don't write to our local cached copy if this is for a non-default size request.
   578  	targetCacheFile := d.cachedSandboxFile
   579  	if sandboxSize != client.DefaultVhdxSizeGB {
   580  		targetCacheFile = ""
   581  	}
   582  
   583  	// Create the ext4 vhdx
   584  	logrus.Debugf("%s: creating sandbox ext4 vhdx", title)
   585  	if err := svm.createExt4VHDX(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil {
   586  		logrus.Debugf("%s: failed to create sandbox vhdx for %s: %s", title, id, err)
   587  		return err
   588  	}
   589  	return nil
   590  }
   591  
   592  // Create creates the folder for the layer with the given id, and
   593  // adds it to the layer chain.
   594  func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
   595  	logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent)
   596  
   597  	parentChain, err := d.getLayerChain(parent)
   598  	if err != nil {
   599  		return err
   600  	}
   601  
   602  	var layerChain []string
   603  	if parent != "" {
   604  		if !d.Exists(parent) {
   605  			return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent)
   606  		}
   607  		layerChain = []string{d.dir(parent)}
   608  	}
   609  	layerChain = append(layerChain, parentChain...)
   610  
   611  	layerPath := d.dir(id)
   612  	logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath)
   613  	// Standard mkdir here, not with SDDL as the dataroot was created with
   614  	// inheritance to just local system and administrators.
   615  	if err := os.MkdirAll(layerPath, 0700); err != nil {
   616  		return err
   617  	}
   618  
   619  	if err := d.setLayerChain(id, layerChain); err != nil {
   620  		if err2 := os.RemoveAll(layerPath); err2 != nil {
   621  			logrus.Warnf("failed to remove layer %s: %s", layerPath, err2)
   622  		}
   623  		return err
   624  	}
   625  	logrus.Debugf("lcowdriver: create: id %s: success", id)
   626  
   627  	return nil
   628  }
   629  
   630  // Remove unmounts and removes the dir information.
   631  func (d *Driver) Remove(id string) error {
   632  	logrus.Debugf("lcowdriver: remove: id %s", id)
   633  	tmpID := fmt.Sprintf("%s-removing", id)
   634  	tmpLayerPath := d.dir(tmpID)
   635  	layerPath := d.dir(id)
   636  
   637  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s", id, layerPath)
   638  
   639  	// Unmount all the layers
   640  	err := d.Put(id)
   641  	if err != nil {
   642  		logrus.Debugf("lcowdriver: remove id %s: failed to unmount: %s", id, err)
   643  		return err
   644  	}
   645  
   646  	// for non-global case just kill the vm
   647  	if !d.globalMode {
   648  		if err := d.terminateServiceVM(id, fmt.Sprintf("Remove %s", id), true); err != nil {
   649  			return err
   650  		}
   651  	}
   652  
   653  	if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
   654  		return err
   655  	}
   656  
   657  	if err := os.RemoveAll(tmpLayerPath); err != nil {
   658  		return err
   659  	}
   660  
   661  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s succeeded", id, layerPath)
   662  	return nil
   663  }
   664  
   665  // Get returns the rootfs path for the id. It is reference counted and
   666  // effectively can be thought of as a "mount the layer into the utility
   667  // vm if it isn't already". The contract from the caller of this is that
   668  // all Gets and Puts are matched. It -should- be the case that on cleanup,
   669  // nothing is mounted.
   670  //
   671  // For optimisation, we don't actually mount the filesystem (which in our
   672  // case means [hot-]adding it to a service VM. But we track that and defer
   673  // the actual adding to the point we need to access it.
   674  func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) {
   675  	title := fmt.Sprintf("lcowdriver: get: %s", id)
   676  	logrus.Debugf(title)
   677  
   678  	// Generate the mounts needed for the deferred operation.
   679  	disks, err := d.getAllMounts(id)
   680  	if err != nil {
   681  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   682  		return nil, fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   683  	}
   684  
   685  	logrus.Debugf("%s: got layer mounts: %+v", title, disks)
   686  	return &lcowfs{
   687  		root:        unionMountName(disks),
   688  		d:           d,
   689  		mappedDisks: disks,
   690  		vmID:        d.getVMID(id),
   691  	}, nil
   692  }
   693  
   694  // Put does the reverse of get. If there are no more references to
   695  // the layer, it unmounts it from the utility VM.
   696  func (d *Driver) Put(id string) error {
   697  	title := fmt.Sprintf("lcowdriver: put: %s", id)
   698  
   699  	// Get the service VM that we need to remove from
   700  	svm, err := d.serviceVms.get(d.getVMID(id))
   701  	if err == errVMUnknown {
   702  		return nil
   703  	} else if err == errVMisTerminating {
   704  		return svm.getStopError()
   705  	}
   706  
   707  	// Generate the mounts that Get() might have mounted
   708  	disks, err := d.getAllMounts(id)
   709  	if err != nil {
   710  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   711  		return fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   712  	}
   713  
   714  	// Now, we want to perform the unmounts, hot-remove and stop the service vm.
   715  	// We want to go though all the steps even if we have an error to clean up properly
   716  	err = svm.deleteUnionMount(unionMountName(disks), disks...)
   717  	if err != nil {
   718  		logrus.Debugf("%s failed to delete union mount %s: %s", title, id, err)
   719  	}
   720  
   721  	err1 := svm.hotRemoveVHDs(disks...)
   722  	if err1 != nil {
   723  		logrus.Debugf("%s failed to hot remove vhds %s: %s", title, id, err)
   724  		if err == nil {
   725  			err = err1
   726  		}
   727  	}
   728  
   729  	err1 = d.terminateServiceVM(id, fmt.Sprintf("Put %s", id), false)
   730  	if err1 != nil {
   731  		logrus.Debugf("%s failed to terminate service vm %s: %s", title, id, err1)
   732  		if err == nil {
   733  			err = err1
   734  		}
   735  	}
   736  	logrus.Debugf("Put succeeded on id %s", id)
   737  	return err
   738  }
   739  
   740  // Cleanup ensures the information the driver stores is properly removed.
   741  // We use this opportunity to cleanup any -removing folders which may be
   742  // still left if the daemon was killed while it was removing a layer.
   743  func (d *Driver) Cleanup() error {
   744  	title := "lcowdriver: cleanup"
   745  
   746  	items, err := ioutil.ReadDir(d.dataRoot)
   747  	if err != nil {
   748  		if os.IsNotExist(err) {
   749  			return nil
   750  		}
   751  		return err
   752  	}
   753  
   754  	// Note we don't return an error below - it's possible the files
   755  	// are locked. However, next time around after the daemon exits,
   756  	// we likely will be able to cleanup successfully. Instead we log
   757  	// warnings if there are errors.
   758  	for _, item := range items {
   759  		if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
   760  			if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil {
   761  				logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err)
   762  			} else {
   763  				logrus.Infof("%s cleaned up %s", title, item.Name())
   764  			}
   765  		}
   766  	}
   767  
   768  	// Cleanup any service VMs we have running, along with their scratch spaces.
   769  	// We don't take the lock for this as it's taken in terminateServiceVm.
   770  	for k, v := range d.serviceVms.svms {
   771  		logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
   772  		d.terminateServiceVM(k, "cleanup", true)
   773  	}
   774  
   775  	return nil
   776  }
   777  
   778  // Diff takes a layer (and it's parent layer which may be null, but
   779  // is ignored by this implementation below) and returns a reader for
   780  // a tarstream representing the layers contents. The id could be
   781  // a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics
   782  // of this function dictate that the layer is already mounted.
   783  // However, as we do lazy mounting as a performance optimisation,
   784  // this will likely not be the case.
   785  func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
   786  	title := fmt.Sprintf("lcowdriver: diff: %s", id)
   787  
   788  	// Get VHDX info
   789  	ld, err := getLayerDetails(d.dir(id))
   790  	if err != nil {
   791  		logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
   792  		return nil, err
   793  	}
   794  
   795  	// Start the SVM with a mapped virtual disk. Note that if the SVM is
   796  	// already running and we are in global mode, this will be
   797  	// hot-added.
   798  	mvd := hcsshim.MappedVirtualDisk{
   799  		HostPath:          ld.filename,
   800  		ContainerPath:     hostToGuest(ld.filename),
   801  		CreateInUtilityVM: true,
   802  		ReadOnly:          true,
   803  	}
   804  
   805  	logrus.Debugf("%s: starting service VM", title)
   806  	svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diff %s", id))
   807  	if err != nil {
   808  		return nil, err
   809  	}
   810  
   811  	logrus.Debugf("lcowdriver: diff: waiting for svm to finish booting")
   812  	err = svm.getStartError()
   813  	if err != nil {
   814  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   815  		return nil, fmt.Errorf("lcowdriver: diff: svm failed to boot: %s", err)
   816  	}
   817  
   818  	// Obtain the tar stream for it
   819  	// The actual container path will have be remapped to a short name, so use that.
   820  	actualContainerPath := svm.getShortContainerPath(&mvd)
   821  	if actualContainerPath == "" {
   822  		return nil, fmt.Errorf("failed to get short container path for %+v in SVM %s", mvd, svm.config.Name)
   823  	}
   824  	logrus.Debugf("%s: %s %s, size %d, ReadOnly %t", title, ld.filename, actualContainerPath, ld.size, ld.isSandbox)
   825  	tarReadCloser, err := svm.config.VhdToTar(mvd.HostPath, actualContainerPath, ld.isSandbox, ld.size)
   826  	if err != nil {
   827  		svm.hotRemoveVHDs(mvd)
   828  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   829  		return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
   830  	}
   831  
   832  	logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent)
   833  
   834  	// In safe/non-global mode, we can't tear down the service VM until things have been read.
   835  	return ioutils.NewReadCloserWrapper(tarReadCloser, func() error {
   836  		tarReadCloser.Close()
   837  		svm.hotRemoveVHDs(mvd)
   838  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   839  		return nil
   840  	}), nil
   841  }
   842  
   843  // ApplyDiff extracts the changeset from the given diff into the
   844  // layer with the specified id and parent, returning the size of the
   845  // new layer in bytes. The layer should not be mounted when calling
   846  // this function. Another way of describing this is that ApplyDiff writes
   847  // to a new layer (a VHD in LCOW) the contents of a tarstream it's given.
   848  func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
   849  	logrus.Debugf("lcowdriver: applydiff: id %s", id)
   850  
   851  	// Log failures here as it's undiagnosable sometimes, due to a possible panic.
   852  	// See https://github.com/moby/moby/issues/37955 for more information.
   853  
   854  	dest := filepath.Join(d.dataRoot, id, layerFilename)
   855  	if !noreexec {
   856  		cmd := reexec.Command([]string{"docker-lcow-tar2ext4", dest}...)
   857  		stdout := bytes.NewBuffer(nil)
   858  		stderr := bytes.NewBuffer(nil)
   859  		cmd.Stdin = diff
   860  		cmd.Stdout = stdout
   861  		cmd.Stderr = stderr
   862  
   863  		if err := cmd.Start(); err != nil {
   864  			logrus.Warnf("lcowdriver: applydiff: id %s failed to start re-exec: %s", id, err)
   865  			return 0, err
   866  		}
   867  
   868  		if err := cmd.Wait(); err != nil {
   869  			logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
   870  			return 0, fmt.Errorf("re-exec error: %v: stderr: %s", err, stderr)
   871  		}
   872  
   873  		size, err := strconv.ParseInt(stdout.String(), 10, 64)
   874  		if err != nil {
   875  			logrus.Warnf("lcowdriver: applydiff: id %s failed to parse output %s", id, err)
   876  			return 0, fmt.Errorf("re-exec error: %v: stdout: %s", err, stdout)
   877  		}
   878  		return applySID(id, size, dest)
   879  
   880  	}
   881  	// The inline case
   882  	size, err := tar2ext4Actual(dest, diff)
   883  	if err != nil {
   884  		logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
   885  	}
   886  	return applySID(id, size, dest)
   887  }
   888  
   889  // applySID adds the VM Group SID read-only access.
   890  func applySID(id string, size int64, dest string) (int64, error) {
   891  	if err := security.GrantVmGroupAccess(dest); err != nil {
   892  		logrus.Warnf("lcowdriver: applySIDs: id %s failed %s", id, err)
   893  		return 0, err
   894  	}
   895  	return size, nil
   896  }
   897  
   898  // tar2ext4Reexec is the re-exec entry point for writing a layer from a tar file
   899  func tar2ext4Reexec() {
   900  	size, err := tar2ext4Actual(os.Args[1], os.Stdin)
   901  	if err != nil {
   902  		fmt.Fprint(os.Stderr, err)
   903  		os.Exit(1)
   904  	}
   905  	fmt.Fprint(os.Stdout, size)
   906  }
   907  
   908  // tar2ext4Actual is the implementation of tar2ext to write a layer from a tar file.
   909  // It can be called through re-exec (default), or inline for debugging.
   910  func tar2ext4Actual(dest string, diff io.Reader) (int64, error) {
   911  	// maxDiskSize is not relating to the sandbox size - this is the
   912  	// maximum possible size a layer VHD generated can be from an EXT4
   913  	// layout perspective.
   914  	const maxDiskSize = 128 * 1024 * 1024 * 1024 // 128GB
   915  	out, err := os.Create(dest)
   916  	if err != nil {
   917  		return 0, err
   918  	}
   919  	defer out.Close()
   920  	if err := tar2ext4.Convert(
   921  		diff,
   922  		out,
   923  		tar2ext4.AppendVhdFooter,
   924  		tar2ext4.ConvertWhiteout,
   925  		tar2ext4.MaximumDiskSize(maxDiskSize)); err != nil {
   926  		return 0, err
   927  	}
   928  	fi, err := os.Stat(dest)
   929  	if err != nil {
   930  		return 0, err
   931  	}
   932  	return fi.Size(), nil
   933  }
   934  
   935  // Changes produces a list of changes between the specified layer
   936  // and its parent layer. If parent is "", then all changes will be ADD changes.
   937  // The layer should not be mounted when calling this function.
   938  func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
   939  	logrus.Debugf("lcowdriver: changes: id %s parent %s", id, parent)
   940  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   941  	return nil, nil
   942  }
   943  
   944  // DiffSize calculates the changes between the specified layer
   945  // and its parent and returns the size in bytes of the changes
   946  // relative to its base filesystem directory.
   947  func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
   948  	logrus.Debugf("lcowdriver: diffsize: id %s", id)
   949  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   950  	return 0, nil
   951  }
   952  
   953  // GetMetadata returns custom driver information.
   954  func (d *Driver) GetMetadata(id string) (map[string]string, error) {
   955  	logrus.Debugf("lcowdriver: getmetadata: id %s", id)
   956  	m := make(map[string]string)
   957  	m["dir"] = d.dir(id)
   958  	return m, nil
   959  }
   960  
   961  // GetLayerPath gets the layer path on host (path to VHD/VHDX)
   962  func (d *Driver) GetLayerPath(id string) (string, error) {
   963  	return d.dir(id), nil
   964  }
   965  
   966  // dir returns the absolute path to the layer.
   967  func (d *Driver) dir(id string) string {
   968  	return filepath.Join(d.dataRoot, filepath.Base(id))
   969  }
   970  
   971  // getLayerChain returns the layer chain information.
   972  func (d *Driver) getLayerChain(id string) ([]string, error) {
   973  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   974  	logrus.Debugf("lcowdriver: getlayerchain: id %s json %s", id, jPath)
   975  	content, err := ioutil.ReadFile(jPath)
   976  	if os.IsNotExist(err) {
   977  		return nil, nil
   978  	} else if err != nil {
   979  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s unable to read layerchain file %s: %s", id, jPath, err)
   980  	}
   981  
   982  	var layerChain []string
   983  	err = json.Unmarshal(content, &layerChain)
   984  	if err != nil {
   985  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s failed to unmarshall layerchain file %s: %s", id, jPath, err)
   986  	}
   987  	return layerChain, nil
   988  }
   989  
   990  // setLayerChain stores the layer chain information on disk.
   991  func (d *Driver) setLayerChain(id string, chain []string) error {
   992  	content, err := json.Marshal(&chain)
   993  	if err != nil {
   994  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to marshall layerchain json: %s", id, err)
   995  	}
   996  
   997  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   998  	logrus.Debugf("lcowdriver: setlayerchain: id %s json %s", id, jPath)
   999  	err = ioutil.WriteFile(jPath, content, 0600)
  1000  	if err != nil {
  1001  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to write layerchain file: %s", id, err)
  1002  	}
  1003  	return nil
  1004  }
  1005  
  1006  // getLayerDetails is a utility for getting a file name, size and indication of
  1007  // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
  1008  // read-write layer will be sandbox.vhdx.
  1009  func getLayerDetails(folder string) (*layerDetails, error) {
  1010  	var fileInfo os.FileInfo
  1011  	ld := &layerDetails{
  1012  		isSandbox: false,
  1013  		filename:  filepath.Join(folder, layerFilename),
  1014  	}
  1015  
  1016  	fileInfo, err := os.Stat(ld.filename)
  1017  	if err != nil {
  1018  		ld.filename = filepath.Join(folder, sandboxFilename)
  1019  		if fileInfo, err = os.Stat(ld.filename); err != nil {
  1020  			return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder)
  1021  		}
  1022  		ld.isSandbox = true
  1023  	}
  1024  	ld.size = fileInfo.Size()
  1025  
  1026  	return ld, nil
  1027  }
  1028  
  1029  func (d *Driver) getAllMounts(id string) ([]hcsshim.MappedVirtualDisk, error) {
  1030  	layerChain, err := d.getLayerChain(id)
  1031  	if err != nil {
  1032  		return nil, err
  1033  	}
  1034  	layerChain = append([]string{d.dir(id)}, layerChain...)
  1035  
  1036  	logrus.Debugf("getting all  layers: %v", layerChain)
  1037  	disks := make([]hcsshim.MappedVirtualDisk, len(layerChain), len(layerChain))
  1038  	for i := range layerChain {
  1039  		ld, err := getLayerDetails(layerChain[i])
  1040  		if err != nil {
  1041  			logrus.Debugf("Failed to get LayerVhdDetails from %s: %s", layerChain[i], err)
  1042  			return nil, err
  1043  		}
  1044  		disks[i].HostPath = ld.filename
  1045  		disks[i].ContainerPath = hostToGuest(ld.filename)
  1046  		disks[i].CreateInUtilityVM = true
  1047  		disks[i].ReadOnly = !ld.isSandbox
  1048  	}
  1049  	return disks, nil
  1050  }
  1051  
  1052  func hostToGuest(hostpath string) string {
  1053  	// This is the "long" container path. At the point of which we are
  1054  	// calculating this, we don't know which service VM we're going to be
  1055  	// using, so we can't translate this to a short path yet, instead
  1056  	// deferring until the point of which it's added to an SVM. We don't
  1057  	// use long container paths in SVMs for SCSI disks, otherwise it can cause
  1058  	// command line operations that we invoke to fail due to being over ~4200
  1059  	// characters when there are ~47 layers involved. An example of this is
  1060  	// the mount call to create the overlay across multiple SCSI-attached disks.
  1061  	// It doesn't affect VPMem attached layers during container creation as
  1062  	// these get mapped by openGCS to /tmp/N/M where N is a container instance
  1063  	// number, and M is a layer number.
  1064  	return fmt.Sprintf("/tmp/%s", filepath.Base(filepath.Dir(hostpath)))
  1065  }
  1066  
  1067  func unionMountName(disks []hcsshim.MappedVirtualDisk) string {
  1068  	return fmt.Sprintf("%s-mount", disks[0].ContainerPath)
  1069  }
  1070  
  1071  type nopCloser struct {
  1072  	io.Reader
  1073  }
  1074  
  1075  func (nopCloser) Close() error {
  1076  	return nil
  1077  }
  1078  
  1079  type fileGetCloserFromSVM struct {
  1080  	id  string
  1081  	svm *serviceVM
  1082  	mvd *hcsshim.MappedVirtualDisk
  1083  	d   *Driver
  1084  }
  1085  
  1086  func (fgc *fileGetCloserFromSVM) Close() error {
  1087  	if fgc.svm != nil {
  1088  		if fgc.mvd != nil {
  1089  			if err := fgc.svm.hotRemoveVHDs(*fgc.mvd); err != nil {
  1090  				// We just log this as we're going to tear down the SVM imminently unless in global mode
  1091  				logrus.Errorf("failed to remove mvd %s: %s", fgc.mvd.ContainerPath, err)
  1092  			}
  1093  		}
  1094  	}
  1095  	if fgc.d != nil && fgc.svm != nil && fgc.id != "" {
  1096  		if err := fgc.d.terminateServiceVM(fgc.id, fmt.Sprintf("diffgetter %s", fgc.id), false); err != nil {
  1097  			return err
  1098  		}
  1099  	}
  1100  	return nil
  1101  }
  1102  
  1103  func (fgc *fileGetCloserFromSVM) Get(filename string) (io.ReadCloser, error) {
  1104  	errOut := &bytes.Buffer{}
  1105  	outOut := &bytes.Buffer{}
  1106  	// Must map to the actual "short" container path where the SCSI disk was mounted
  1107  	actualContainerPath := fgc.svm.getShortContainerPath(fgc.mvd)
  1108  	if actualContainerPath == "" {
  1109  		return nil, fmt.Errorf("inconsistency detected: couldn't get short container path for %+v in utility VM %s", fgc.mvd, fgc.svm.config.Name)
  1110  	}
  1111  	file := path.Join(actualContainerPath, filename)
  1112  
  1113  	// Ugly fix for MSFT internal bug VSO#19696554
  1114  	// If a file name contains a space, pushing an image fails.
  1115  	// Using solution from https://groups.google.com/forum/#!topic/Golang-Nuts/DpldsmrhPio to escape for shell execution
  1116  	file = "'" + strings.Join(strings.Split(file, "'"), `'"'"'`) + "'"
  1117  	if err := fgc.svm.runProcess(fmt.Sprintf("cat %s", file), nil, outOut, errOut); err != nil {
  1118  		logrus.Debugf("cat %s failed: %s", file, errOut.String())
  1119  		return nil, err
  1120  	}
  1121  	return nopCloser{bytes.NewReader(outOut.Bytes())}, nil
  1122  }
  1123  
  1124  // DiffGetter returns a FileGetCloser that can read files from the directory that
  1125  // contains files for the layer differences. Used for direct access for tar-split.
  1126  func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
  1127  	title := fmt.Sprintf("lcowdriver: diffgetter: %s", id)
  1128  	logrus.Debugf(title)
  1129  
  1130  	ld, err := getLayerDetails(d.dir(id))
  1131  	if err != nil {
  1132  		logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
  1133  		return nil, err
  1134  	}
  1135  
  1136  	// Start the SVM with a mapped virtual disk. Note that if the SVM is
  1137  	// already running and we are in global mode, this will be hot-added.
  1138  	mvd := hcsshim.MappedVirtualDisk{
  1139  		HostPath:          ld.filename,
  1140  		ContainerPath:     hostToGuest(ld.filename),
  1141  		CreateInUtilityVM: true,
  1142  		ReadOnly:          true,
  1143  	}
  1144  
  1145  	logrus.Debugf("%s: starting service VM", title)
  1146  	svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diffgetter %s", id))
  1147  	if err != nil {
  1148  		return nil, err
  1149  	}
  1150  
  1151  	logrus.Debugf("%s: waiting for svm to finish booting", title)
  1152  	err = svm.getStartError()
  1153  	if err != nil {
  1154  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
  1155  		return nil, fmt.Errorf("%s: svm failed to boot: %s", title, err)
  1156  	}
  1157  
  1158  	return &fileGetCloserFromSVM{
  1159  		id:  id,
  1160  		svm: svm,
  1161  		mvd: &mvd,
  1162  		d:   d}, nil
  1163  }