github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/graphdriver/lcow/lcow.go (about)

     1  // +build windows
     2  
     3  // Locale:      en-gb
     4  // About:       Graph-driver for Linux Containers On Windows (LCOW)
     5  //
     6  // This graphdriver runs in two modes. Yet to be determined which one will
     7  // be the shipping mode. The global mode is where a single utility VM
     8  // is used for all service VM tool operations. This isn't safe security-wise
     9  // as it's attaching a sandbox of multiple containers to it, containing
    10  // untrusted data. This may be fine for client devops scenarios. In
    11  // safe mode, a unique utility VM is instantiated for all service VM tool
    12  // operations. The downside of safe-mode is that operations are slower as
    13  // a new service utility VM has to be started and torn-down when needed.
    14  //
    15  // Options:
    16  //
    17  // The following options are read by the graphdriver itself:
    18  //
    19  //   * lcow.globalmode - Enables global service VM Mode
    20  //        -- Possible values:     true/false
    21  //        -- Default if omitted:  false
    22  //
    23  //   * lcow.sandboxsize - Specifies a custom sandbox size in GB for starting a container
    24  //        -- Possible values:      >= default sandbox size (opengcs defined, currently 20)
    25  //        -- Default if omitted:  20
    26  //
    27  // The following options are read by opengcs:
    28  //
    29  //   * lcow.kirdpath - Specifies a custom path to a kernel/initrd pair
    30  //        -- Possible values:      Any local path that is not a mapped drive
    31  //        -- Default if omitted:  %ProgramFiles%\Linux Containers
    32  //
    33  //   * lcow.bootparameters - Specifies additional boot parameters for booting in kernel+initrd mode
    34  //        -- Possible values:      Any valid linux kernel boot options
    35  //        -- Default if omitted:  <nil>
    36  //
    37  //   * lcow.timeout - Specifies a timeout for utility VM operations in seconds
    38  //        -- Possible values:      >=0
    39  //        -- Default if omitted:  300
    40  
    41  // TODO: Grab logs from SVM at terminate or errors
    42  
    43  package lcow // import "github.com/docker/docker/daemon/graphdriver/lcow"
    44  
    45  import (
    46  	"bytes"
    47  	"encoding/json"
    48  	"fmt"
    49  	"io"
    50  	"io/ioutil"
    51  	"os"
    52  	"path"
    53  	"path/filepath"
    54  	"strconv"
    55  	"strings"
    56  	"sync"
    57  	"syscall"
    58  	"time"
    59  
    60  	"github.com/Microsoft/go-winio/pkg/security"
    61  	"github.com/Microsoft/hcsshim"
    62  	"github.com/Microsoft/hcsshim/ext4/tar2ext4"
    63  	"github.com/Microsoft/opengcs/client"
    64  	"github.com/docker/docker/daemon/graphdriver"
    65  	"github.com/docker/docker/pkg/archive"
    66  	"github.com/docker/docker/pkg/containerfs"
    67  	"github.com/docker/docker/pkg/idtools"
    68  	"github.com/docker/docker/pkg/ioutils"
    69  	"github.com/docker/docker/pkg/reexec"
    70  	"github.com/sirupsen/logrus"
    71  )
    72  
    73  // noreexec controls reexec functionality. Off by default, on for debugging purposes.
    74  var noreexec = false
    75  
    76  // init registers this driver to the register. It gets initialised by the
    77  // function passed in the second parameter, implemented in this file.
    78  func init() {
    79  	graphdriver.Register("lcow", InitDriver)
    80  	// DOCKER_LCOW_NOREEXEC allows for inline processing which makes
    81  	// debugging issues in the re-exec codepath significantly easier.
    82  	if os.Getenv("DOCKER_LCOW_NOREEXEC") != "" {
    83  		logrus.Warnf("LCOW Graphdriver is set to not re-exec. This is intended for debugging purposes only.")
    84  		noreexec = true
    85  	} else {
    86  		reexec.Register("docker-lcow-tar2ext4", tar2ext4Reexec)
    87  	}
    88  }
    89  
    90  const (
    91  	// sandboxFilename is the name of the file containing a layer's sandbox (read-write layer).
    92  	sandboxFilename = "sandbox.vhdx"
    93  
    94  	// scratchFilename is the name of the scratch-space used by an SVM to avoid running out of memory.
    95  	scratchFilename = "scratch.vhdx"
    96  
    97  	// layerFilename is the name of the file containing a layer's read-only contents.
    98  	// Note this really is VHD format, not VHDX.
    99  	layerFilename = "layer.vhd"
   100  
   101  	// toolsScratchPath is a location in a service utility VM that the tools can use as a
   102  	// scratch space to avoid running out of memory.
   103  	toolsScratchPath = "/tmp/scratch"
   104  
   105  	// svmGlobalID is the ID used in the serviceVMs map for the global service VM when running in "global" mode.
   106  	svmGlobalID = "_lcow_global_svm_"
   107  
   108  	// cacheDirectory is the sub-folder under the driver's data-root used to cache blank sandbox and scratch VHDs.
   109  	cacheDirectory = "cache"
   110  
   111  	// scratchDirectory is the sub-folder under the driver's data-root used for scratch VHDs in service VMs
   112  	scratchDirectory = "scratch"
   113  
   114  	// errOperationPending is the HRESULT returned by the HCS when the VM termination operation is still pending.
   115  	errOperationPending syscall.Errno = 0xc0370103
   116  )
   117  
   118  // Driver represents an LCOW graph driver.
   119  type Driver struct {
   120  	dataRoot           string     // Root path on the host where we are storing everything.
   121  	cachedSandboxFile  string     // Location of the local default-sized cached sandbox.
   122  	cachedSandboxMutex sync.Mutex // Protects race conditions from multiple threads creating the cached sandbox.
   123  	cachedScratchFile  string     // Location of the local cached empty scratch space.
   124  	cachedScratchMutex sync.Mutex // Protects race conditions from multiple threads creating the cached scratch.
   125  	options            []string   // Graphdriver options we are initialised with.
   126  	globalMode         bool       // Indicates if running in an unsafe/global service VM mode.
   127  
   128  	// NOTE: It is OK to use a cache here because Windows does not support
   129  	// restoring containers when the daemon dies.
   130  	serviceVms *serviceVMMap // Map of the configs representing the service VM(s) we are running.
   131  }
   132  
   133  // layerDetails is the structure returned by a helper function `getLayerDetails`
   134  // for getting information about a layer folder
   135  type layerDetails struct {
   136  	filename  string // \path\to\sandbox.vhdx or \path\to\layer.vhd
   137  	size      int64  // size of the above file
   138  	isSandbox bool   // true if sandbox.vhdx
   139  }
   140  
   141  // deletefiles is a helper function for initialisation where we delete any
   142  // left-over scratch files in case we were previously forcibly terminated.
   143  func deletefiles(path string, f os.FileInfo, err error) error {
   144  	if strings.HasSuffix(f.Name(), ".vhdx") {
   145  		logrus.Warnf("lcowdriver: init: deleting stale scratch file %s", path)
   146  		return os.Remove(path)
   147  	}
   148  	return nil
   149  }
   150  
   151  // InitDriver returns a new LCOW storage driver.
   152  func InitDriver(dataRoot string, options []string, _, _ []idtools.IDMap) (graphdriver.Driver, error) {
   153  	title := "lcowdriver: init:"
   154  
   155  	cd := filepath.Join(dataRoot, cacheDirectory)
   156  	sd := filepath.Join(dataRoot, scratchDirectory)
   157  
   158  	d := &Driver{
   159  		dataRoot:          dataRoot,
   160  		options:           options,
   161  		cachedSandboxFile: filepath.Join(cd, sandboxFilename),
   162  		cachedScratchFile: filepath.Join(cd, scratchFilename),
   163  		serviceVms: &serviceVMMap{
   164  			svms: make(map[string]*serviceVMMapItem),
   165  		},
   166  		globalMode: false,
   167  	}
   168  
   169  	// Looks for relevant options
   170  	for _, v := range options {
   171  		opt := strings.SplitN(v, "=", 2)
   172  		if len(opt) == 2 {
   173  			switch strings.ToLower(opt[0]) {
   174  			case "lcow.globalmode":
   175  				var err error
   176  				d.globalMode, err = strconv.ParseBool(opt[1])
   177  				if err != nil {
   178  					return nil, fmt.Errorf("%s failed to parse value for 'lcow.globalmode' - must be 'true' or 'false'", title)
   179  				}
   180  				break
   181  			}
   182  		}
   183  	}
   184  
   185  	// Make sure the dataRoot directory is created
   186  	if err := idtools.MkdirAllAndChown(dataRoot, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   187  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, dataRoot, err)
   188  	}
   189  
   190  	// Make sure the cache directory is created under dataRoot
   191  	if err := idtools.MkdirAllAndChown(cd, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   192  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, cd, err)
   193  	}
   194  
   195  	// Make sure the scratch directory is created under dataRoot
   196  	if err := idtools.MkdirAllAndChown(sd, 0700, idtools.Identity{UID: 0, GID: 0}); err != nil {
   197  		return nil, fmt.Errorf("%s failed to create '%s': %v", title, sd, err)
   198  	}
   199  
   200  	// Delete any items in the scratch directory
   201  	filepath.Walk(sd, deletefiles)
   202  
   203  	logrus.Infof("%s dataRoot: %s globalMode: %t", title, dataRoot, d.globalMode)
   204  
   205  	return d, nil
   206  }
   207  
   208  func (d *Driver) getVMID(id string) string {
   209  	if d.globalMode {
   210  		return svmGlobalID
   211  	}
   212  	return id
   213  }
   214  
   215  // remapLongToShortContainerPath does the mapping of a long container path for a
   216  // SCSI attached disk, to a short container path where it's actually mounted.
   217  func remapLongToShortContainerPath(longContainerPath string, attachCounter uint64, svmName string) string {
   218  	shortContainerPath := longContainerPath
   219  	if shortContainerPath != "" && shortContainerPath != toolsScratchPath {
   220  		shortContainerPath = fmt.Sprintf("/tmp/d%d", attachCounter)
   221  		logrus.Debugf("lcowdriver: UVM %s: remapping %s --> %s", svmName, longContainerPath, shortContainerPath)
   222  	}
   223  	return shortContainerPath
   224  }
   225  
   226  // startServiceVMIfNotRunning starts a service utility VM if it is not currently running.
   227  // It can optionally be started with a mapped virtual disk. Returns a opengcs config structure
   228  // representing the VM.
   229  func (d *Driver) startServiceVMIfNotRunning(id string, mvdToAdd []hcsshim.MappedVirtualDisk, context string) (_ *serviceVM, err error) {
   230  	// Use the global ID if in global mode
   231  	id = d.getVMID(id)
   232  
   233  	title := "lcowdriver: startServiceVMIfNotRunning " + id
   234  
   235  	// Attempt to add ID to the service vm map
   236  	logrus.Debugf("%s: adding entry to service vm map", title)
   237  	svm, exists, err := d.serviceVms.add(id)
   238  	if err != nil && err == errVMisTerminating {
   239  		// VM is in the process of terminating. Wait until it's done and then try again
   240  		logrus.Debugf("%s: VM with current ID still in the process of terminating", title)
   241  		if err := svm.getStopError(); err != nil {
   242  			logrus.Debugf("%s: VM did not stop successfully: %s", title, err)
   243  			return nil, err
   244  		}
   245  		return d.startServiceVMIfNotRunning(id, mvdToAdd, context)
   246  	} else if err != nil {
   247  		logrus.Debugf("%s: failed to add service vm to map: %s", title, err)
   248  		return nil, fmt.Errorf("%s: failed to add to service vm map: %s", title, err)
   249  	}
   250  
   251  	if exists {
   252  		// Service VM is already up and running. In this case, just hot add the vhds.
   253  		// Note that hotAddVHDs will remap long to short container paths, so no need
   254  		// for us to that here.
   255  		logrus.Debugf("%s: service vm already exists. Just hot adding: %+v", title, mvdToAdd)
   256  		if err := svm.hotAddVHDs(mvdToAdd...); err != nil {
   257  			logrus.Debugf("%s: failed to hot add vhds on service vm creation: %s", title, err)
   258  			return nil, fmt.Errorf("%s: failed to hot add vhds on service vm: %s", title, err)
   259  		}
   260  		return svm, nil
   261  	}
   262  
   263  	// We are the first service for this id, so we need to start it
   264  	logrus.Debugf("%s: service vm doesn't exist. Now starting it up", title)
   265  
   266  	defer func() {
   267  		// Signal that start has finished, passing in the error if any.
   268  		svm.signalStartFinished(err)
   269  		if err != nil {
   270  			// We added a ref to the VM, since we failed, we should delete the ref.
   271  			d.terminateServiceVM(id, "error path on startServiceVMIfNotRunning", false)
   272  		}
   273  	}()
   274  
   275  	// Generate a default configuration
   276  	if err := svm.config.GenerateDefault(d.options); err != nil {
   277  		return nil, fmt.Errorf("%s: failed to generate default gogcs configuration for global svm (%s): %s", title, context, err)
   278  	}
   279  
   280  	// For the name, we deliberately suffix if safe-mode to ensure that it doesn't
   281  	// clash with another utility VM which may be running for the container itself.
   282  	// This also makes it easier to correlate through Get-ComputeProcess.
   283  	if id == svmGlobalID {
   284  		svm.config.Name = svmGlobalID
   285  	} else {
   286  		svm.config.Name = fmt.Sprintf("%s_svm", id)
   287  	}
   288  
   289  	// Ensure we take the cached scratch mutex around the check to ensure the file is complete
   290  	// and not in the process of being created by another thread.
   291  	scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   292  
   293  	logrus.Debugf("%s: locking cachedScratchMutex", title)
   294  	d.cachedScratchMutex.Lock()
   295  	if _, err := os.Stat(d.cachedScratchFile); err == nil {
   296  		// Make a copy of cached scratch to the scratch directory
   297  		logrus.Debugf("%s: (%s) cloning cached scratch for mvd", title, context)
   298  		if err := client.CopyFile(d.cachedScratchFile, scratchTargetFile, true); err != nil {
   299  			logrus.Debugf("%s: releasing cachedScratchMutex on err: %s", title, err)
   300  			d.cachedScratchMutex.Unlock()
   301  			return nil, err
   302  		}
   303  
   304  		// Add the cached clone as a mapped virtual disk
   305  		logrus.Debugf("%s: (%s) adding cloned scratch as mvd", title, context)
   306  		mvd := hcsshim.MappedVirtualDisk{
   307  			HostPath:          scratchTargetFile,
   308  			ContainerPath:     toolsScratchPath,
   309  			CreateInUtilityVM: true,
   310  		}
   311  		svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
   312  		svm.scratchAttached = true
   313  	}
   314  
   315  	logrus.Debugf("%s: releasing cachedScratchMutex", title)
   316  	d.cachedScratchMutex.Unlock()
   317  
   318  	// Add mapped virtual disks. First those that are already in the configuration. Generally,
   319  	// the only one that will be here is the service VMs scratch. The exception is when invoked
   320  	// via the graphdrivers DiffGetter implementation.
   321  	for i, mvd := range svm.config.MappedVirtualDisks {
   322  		svm.attachCounter++
   323  		svm.attachedVHDs[mvd.HostPath] = &attachedVHD{refCount: 1, attachCounter: svm.attachCounter}
   324  
   325  		// No-op for the service VMs scratch disk. Only applicable in the DiffGetter interface invocation.
   326  		svm.config.MappedVirtualDisks[i].ContainerPath = remapLongToShortContainerPath(mvd.ContainerPath, svm.attachCounter, svm.config.Name)
   327  	}
   328  
   329  	// Then the remaining ones to add, and adding them to the startup configuration.
   330  	for _, mvd := range mvdToAdd {
   331  		svm.attachCounter++
   332  		svm.attachedVHDs[mvd.HostPath] = &attachedVHD{refCount: 1, attachCounter: svm.attachCounter}
   333  		mvd.ContainerPath = remapLongToShortContainerPath(mvd.ContainerPath, svm.attachCounter, svm.config.Name)
   334  		svm.config.MappedVirtualDisks = append(svm.config.MappedVirtualDisks, mvd)
   335  	}
   336  
   337  	// Start it.
   338  	logrus.Debugf("%s: (%s) starting %s", title, context, svm.config.Name)
   339  	if err := svm.config.StartUtilityVM(); err != nil {
   340  		return nil, fmt.Errorf("failed to start service utility VM (%s): %s", context, err)
   341  	}
   342  
   343  	// defer function to terminate the VM if the next steps fail
   344  	defer func() {
   345  		if err != nil {
   346  			waitTerminate(svm, fmt.Sprintf("%s: (%s)", title, context))
   347  		}
   348  	}()
   349  
   350  	// Now we have a running service VM, we can create the cached scratch file if it doesn't exist.
   351  	logrus.Debugf("%s: locking cachedScratchMutex", title)
   352  	d.cachedScratchMutex.Lock()
   353  	if _, err := os.Stat(d.cachedScratchFile); err != nil {
   354  		logrus.Debugf("%s: (%s) creating an SVM scratch", title, context)
   355  
   356  		// Don't use svm.CreateExt4Vhdx since that only works when the service vm is setup,
   357  		// but we're still in that process right now.
   358  		if err := svm.config.CreateExt4Vhdx(scratchTargetFile, client.DefaultVhdxSizeGB, d.cachedScratchFile); err != nil {
   359  			logrus.Debugf("%s: (%s) releasing cachedScratchMutex on error path", title, context)
   360  			d.cachedScratchMutex.Unlock()
   361  			logrus.Debugf("%s: failed to create vm scratch %s: %s", title, scratchTargetFile, err)
   362  			return nil, fmt.Errorf("failed to create SVM scratch VHDX (%s): %s", context, err)
   363  		}
   364  	}
   365  	logrus.Debugf("%s: (%s) releasing cachedScratchMutex", title, context)
   366  	d.cachedScratchMutex.Unlock()
   367  
   368  	// Hot-add the scratch-space if not already attached
   369  	if !svm.scratchAttached {
   370  		logrus.Debugf("%s: (%s) hot-adding scratch %s", title, context, scratchTargetFile)
   371  		if err := svm.hotAddVHDsAtStart(hcsshim.MappedVirtualDisk{
   372  			HostPath:          scratchTargetFile,
   373  			ContainerPath:     toolsScratchPath,
   374  			CreateInUtilityVM: true,
   375  		}); err != nil {
   376  			logrus.Debugf("%s: failed to hot-add scratch %s: %s", title, scratchTargetFile, err)
   377  			return nil, fmt.Errorf("failed to hot-add %s failed: %s", scratchTargetFile, err)
   378  		}
   379  		svm.scratchAttached = true
   380  		// Don't need to ref-count here as it will be done via hotAddVHDsAtStart() call above.
   381  	}
   382  
   383  	logrus.Debugf("%s: (%s) success", title, context)
   384  	return svm, nil
   385  }
   386  
   387  // terminateServiceVM terminates a service utility VM if its running if it's,
   388  // not being used by any goroutine, but does nothing when in global mode as it's
   389  // lifetime is limited to that of the daemon. If the force flag is set, then
   390  // the VM will be killed regardless of the ref count or if it's global.
   391  func (d *Driver) terminateServiceVM(id, context string, force bool) (err error) {
   392  	// We don't do anything in safe mode unless the force flag has been passed, which
   393  	// is only the case for cleanup at driver termination.
   394  	if d.globalMode && !force {
   395  		logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - doing nothing as in global mode", id, context)
   396  		return nil
   397  	}
   398  
   399  	id = d.getVMID(id)
   400  
   401  	var svm *serviceVM
   402  	var lastRef bool
   403  	if !force {
   404  		// In the not force case, we ref count
   405  		svm, lastRef, err = d.serviceVms.decrementRefCount(id)
   406  	} else {
   407  		// In the force case, we ignore the ref count and just set it to 0
   408  		svm, err = d.serviceVms.setRefCountZero(id)
   409  		lastRef = true
   410  	}
   411  
   412  	if err == errVMUnknown {
   413  		return nil
   414  	} else if err == errVMisTerminating {
   415  		return svm.getStopError()
   416  	} else if !lastRef {
   417  		return nil
   418  	}
   419  
   420  	// We run the deletion of the scratch as a deferred function to at least attempt
   421  	// clean-up in case of errors.
   422  	defer func() {
   423  		if svm.scratchAttached {
   424  			scratchTargetFile := filepath.Join(d.dataRoot, scratchDirectory, fmt.Sprintf("%s.vhdx", id))
   425  			logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - deleting scratch %s", id, context, scratchTargetFile)
   426  			if errRemove := os.Remove(scratchTargetFile); errRemove != nil {
   427  				logrus.Warnf("failed to remove scratch file %s (%s): %s", scratchTargetFile, context, errRemove)
   428  				err = errRemove
   429  			}
   430  		}
   431  
   432  		// This function shouldn't actually return error unless there is a bug
   433  		if errDelete := d.serviceVms.deleteID(id); errDelete != nil {
   434  			logrus.Warnf("failed to service vm from svm map %s (%s): %s", id, context, errDelete)
   435  		}
   436  
   437  		// Signal that this VM has stopped
   438  		svm.signalStopFinished(err)
   439  	}()
   440  
   441  	// Now it's possible that the service VM failed to start and now we are trying to terminate it.
   442  	// In this case, we will relay the error to the goroutines waiting for this vm to stop.
   443  	if err := svm.getStartError(); err != nil {
   444  		logrus.Debugf("lcowdriver: terminateservicevm: %s had failed to start up: %s", id, err)
   445  		return err
   446  	}
   447  
   448  	if err := waitTerminate(svm, fmt.Sprintf("terminateservicevm: %s (%s)", id, context)); err != nil {
   449  		return err
   450  	}
   451  
   452  	logrus.Debugf("lcowdriver: terminateservicevm: %s (%s) - success", id, context)
   453  	return nil
   454  }
   455  
   456  func waitTerminate(svm *serviceVM, context string) error {
   457  	if svm.config == nil {
   458  		return fmt.Errorf("lcowdriver: waitTermiante: Nil utility VM. %s", context)
   459  	}
   460  
   461  	logrus.Debugf("lcowdriver: waitTerminate: Calling terminate: %s", context)
   462  	if err := svm.config.Uvm.Terminate(); err != nil {
   463  		// We might get operation still pending from the HCS. In that case, we shouldn't return
   464  		// an error since we call wait right after.
   465  		underlyingError := err
   466  		if conterr, ok := err.(*hcsshim.ContainerError); ok {
   467  			underlyingError = conterr.Err
   468  		}
   469  
   470  		if syscallErr, ok := underlyingError.(syscall.Errno); ok {
   471  			underlyingError = syscallErr
   472  		}
   473  
   474  		if underlyingError != errOperationPending {
   475  			return fmt.Errorf("failed to terminate utility VM (%s): %s", context, err)
   476  		}
   477  		logrus.Debugf("lcowdriver: waitTerminate: uvm.Terminate() returned operation pending (%s)", context)
   478  	}
   479  
   480  	logrus.Debugf("lcowdriver: waitTerminate: (%s) - waiting for utility VM to terminate", context)
   481  	if err := svm.config.Uvm.WaitTimeout(time.Duration(svm.config.UvmTimeoutSeconds) * time.Second); err != nil {
   482  		return fmt.Errorf("failed waiting for utility VM to terminate (%s): %s", context, err)
   483  	}
   484  	return nil
   485  }
   486  
   487  // String returns the string representation of a driver. This should match
   488  // the name the graph driver has been registered with.
   489  func (d *Driver) String() string {
   490  	return "lcow"
   491  }
   492  
   493  // Status returns the status of the driver.
   494  func (d *Driver) Status() [][2]string {
   495  	return [][2]string{
   496  		{"LCOW", ""},
   497  		// TODO: Add some more info here - mode, home, ....
   498  	}
   499  }
   500  
   501  // Exists returns true if the given id is registered with this driver.
   502  func (d *Driver) Exists(id string) bool {
   503  	_, err := os.Lstat(d.dir(id))
   504  	logrus.Debugf("lcowdriver: exists: id %s %t", id, err == nil)
   505  	return err == nil
   506  }
   507  
   508  // CreateReadWrite creates a layer that is writable for use as a container
   509  // file system. That equates to creating a sandbox.
   510  func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
   511  	title := fmt.Sprintf("lcowdriver: createreadwrite: id %s", id)
   512  	logrus.Debugf(title)
   513  
   514  	// First we need to create the folder
   515  	if err := d.Create(id, parent, opts); err != nil {
   516  		return err
   517  	}
   518  
   519  	// Look for an explicit sandbox size option.
   520  	sandboxSize := uint64(client.DefaultVhdxSizeGB)
   521  	for k, v := range opts.StorageOpt {
   522  		switch strings.ToLower(k) {
   523  		case "lcow.sandboxsize":
   524  			var err error
   525  			sandboxSize, err = strconv.ParseUint(v, 10, 32)
   526  			if err != nil {
   527  				return fmt.Errorf("%s failed to parse value '%s' for 'lcow.sandboxsize'", title, v)
   528  			}
   529  			if sandboxSize < client.DefaultVhdxSizeGB {
   530  				return fmt.Errorf("%s 'lcow.sandboxsize' option cannot be less than %d", title, client.DefaultVhdxSizeGB)
   531  			}
   532  			break
   533  		}
   534  	}
   535  
   536  	// Massive perf optimisation here. If we know that the RW layer is the default size,
   537  	// and that the cached sandbox already exists, and we are running in safe mode, we
   538  	// can just do a simple copy into the layers sandbox file without needing to start a
   539  	// unique service VM. For a global service VM, it doesn't really matter. Of course,
   540  	// this is only the case where the sandbox is the default size.
   541  	//
   542  	// Make sure we have the sandbox mutex taken while we are examining it.
   543  	if sandboxSize == client.DefaultVhdxSizeGB {
   544  		logrus.Debugf("%s: locking cachedSandboxMutex", title)
   545  		d.cachedSandboxMutex.Lock()
   546  		_, err := os.Stat(d.cachedSandboxFile)
   547  		logrus.Debugf("%s: releasing cachedSandboxMutex", title)
   548  		d.cachedSandboxMutex.Unlock()
   549  		if err == nil {
   550  			logrus.Debugf("%s: using cached sandbox to populate", title)
   551  			if err := client.CopyFile(d.cachedSandboxFile, filepath.Join(d.dir(id), sandboxFilename), true); err != nil {
   552  				return err
   553  			}
   554  			return nil
   555  		}
   556  	}
   557  
   558  	logrus.Debugf("%s: creating SVM to create sandbox", title)
   559  	svm, err := d.startServiceVMIfNotRunning(id, nil, "createreadwrite")
   560  	if err != nil {
   561  		return err
   562  	}
   563  	defer d.terminateServiceVM(id, "createreadwrite", false)
   564  
   565  	// So the sandbox needs creating. If default size ensure we are the only thread populating the cache.
   566  	// Non-default size we don't store, just create them one-off so no need to lock the cachedSandboxMutex.
   567  	if sandboxSize == client.DefaultVhdxSizeGB {
   568  		logrus.Debugf("%s: locking cachedSandboxMutex for creation", title)
   569  		d.cachedSandboxMutex.Lock()
   570  		defer func() {
   571  			logrus.Debugf("%s: releasing cachedSandboxMutex for creation", title)
   572  			d.cachedSandboxMutex.Unlock()
   573  		}()
   574  	}
   575  
   576  	// Make sure we don't write to our local cached copy if this is for a non-default size request.
   577  	targetCacheFile := d.cachedSandboxFile
   578  	if sandboxSize != client.DefaultVhdxSizeGB {
   579  		targetCacheFile = ""
   580  	}
   581  
   582  	// Create the ext4 vhdx
   583  	logrus.Debugf("%s: creating sandbox ext4 vhdx", title)
   584  	if err := svm.createExt4VHDX(filepath.Join(d.dir(id), sandboxFilename), uint32(sandboxSize), targetCacheFile); err != nil {
   585  		logrus.Debugf("%s: failed to create sandbox vhdx for %s: %s", title, id, err)
   586  		return err
   587  	}
   588  	return nil
   589  }
   590  
   591  // Create creates the folder for the layer with the given id, and
   592  // adds it to the layer chain.
   593  func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
   594  	logrus.Debugf("lcowdriver: create: id %s parent: %s", id, parent)
   595  
   596  	parentChain, err := d.getLayerChain(parent)
   597  	if err != nil {
   598  		return err
   599  	}
   600  
   601  	var layerChain []string
   602  	if parent != "" {
   603  		if !d.Exists(parent) {
   604  			return fmt.Errorf("lcowdriver: cannot create layer folder with missing parent %s", parent)
   605  		}
   606  		layerChain = []string{d.dir(parent)}
   607  	}
   608  	layerChain = append(layerChain, parentChain...)
   609  
   610  	layerPath := d.dir(id)
   611  	logrus.Debugf("lcowdriver: create: id %s: creating %s", id, layerPath)
   612  	// Standard mkdir here, not with SDDL as the dataroot was created with
   613  	// inheritance to just local system and administrators.
   614  	if err := os.MkdirAll(layerPath, 0700); err != nil {
   615  		return err
   616  	}
   617  
   618  	if err := d.setLayerChain(id, layerChain); err != nil {
   619  		if err2 := os.RemoveAll(layerPath); err2 != nil {
   620  			logrus.Warnf("failed to remove layer %s: %s", layerPath, err2)
   621  		}
   622  		return err
   623  	}
   624  	logrus.Debugf("lcowdriver: create: id %s: success", id)
   625  
   626  	return nil
   627  }
   628  
   629  // Remove unmounts and removes the dir information.
   630  func (d *Driver) Remove(id string) error {
   631  	logrus.Debugf("lcowdriver: remove: id %s", id)
   632  	tmpID := fmt.Sprintf("%s-removing", id)
   633  	tmpLayerPath := d.dir(tmpID)
   634  	layerPath := d.dir(id)
   635  
   636  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s", id, layerPath)
   637  
   638  	// Unmount all the layers
   639  	err := d.Put(id)
   640  	if err != nil {
   641  		logrus.Debugf("lcowdriver: remove id %s: failed to unmount: %s", id, err)
   642  		return err
   643  	}
   644  
   645  	// for non-global case just kill the vm
   646  	if !d.globalMode {
   647  		if err := d.terminateServiceVM(id, fmt.Sprintf("Remove %s", id), true); err != nil {
   648  			return err
   649  		}
   650  	}
   651  
   652  	if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
   653  		return err
   654  	}
   655  
   656  	if err := os.RemoveAll(tmpLayerPath); err != nil {
   657  		return err
   658  	}
   659  
   660  	logrus.Debugf("lcowdriver: remove: id %s: layerPath %s succeeded", id, layerPath)
   661  	return nil
   662  }
   663  
   664  // Get returns the rootfs path for the id. It is reference counted and
   665  // effectively can be thought of as a "mount the layer into the utility
   666  // vm if it isn't already". The contract from the caller of this is that
   667  // all Gets and Puts are matched. It -should- be the case that on cleanup,
   668  // nothing is mounted.
   669  //
   670  // For optimisation, we don't actually mount the filesystem (which in our
   671  // case means [hot-]adding it to a service VM. But we track that and defer
   672  // the actual adding to the point we need to access it.
   673  func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) {
   674  	title := fmt.Sprintf("lcowdriver: get: %s", id)
   675  	logrus.Debugf(title)
   676  
   677  	// Generate the mounts needed for the deferred operation.
   678  	disks, err := d.getAllMounts(id)
   679  	if err != nil {
   680  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   681  		return nil, fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   682  	}
   683  
   684  	logrus.Debugf("%s: got layer mounts: %+v", title, disks)
   685  	return &lcowfs{
   686  		root:        unionMountName(disks),
   687  		d:           d,
   688  		mappedDisks: disks,
   689  		vmID:        d.getVMID(id),
   690  	}, nil
   691  }
   692  
   693  // Put does the reverse of get. If there are no more references to
   694  // the layer, it unmounts it from the utility VM.
   695  func (d *Driver) Put(id string) error {
   696  	title := fmt.Sprintf("lcowdriver: put: %s", id)
   697  
   698  	// Get the service VM that we need to remove from
   699  	svm, err := d.serviceVms.get(d.getVMID(id))
   700  	if err == errVMUnknown {
   701  		return nil
   702  	} else if err == errVMisTerminating {
   703  		return svm.getStopError()
   704  	}
   705  
   706  	// Generate the mounts that Get() might have mounted
   707  	disks, err := d.getAllMounts(id)
   708  	if err != nil {
   709  		logrus.Debugf("%s failed to get all layer details for %s: %s", title, d.dir(id), err)
   710  		return fmt.Errorf("%s failed to get layer details for %s: %s", title, d.dir(id), err)
   711  	}
   712  
   713  	// Now, we want to perform the unmounts, hot-remove and stop the service vm.
   714  	// We want to go though all the steps even if we have an error to clean up properly
   715  	err = svm.deleteUnionMount(unionMountName(disks), disks...)
   716  	if err != nil {
   717  		logrus.Debugf("%s failed to delete union mount %s: %s", title, id, err)
   718  	}
   719  
   720  	err1 := svm.hotRemoveVHDs(disks...)
   721  	if err1 != nil {
   722  		logrus.Debugf("%s failed to hot remove vhds %s: %s", title, id, err)
   723  		if err == nil {
   724  			err = err1
   725  		}
   726  	}
   727  
   728  	err1 = d.terminateServiceVM(id, fmt.Sprintf("Put %s", id), false)
   729  	if err1 != nil {
   730  		logrus.Debugf("%s failed to terminate service vm %s: %s", title, id, err1)
   731  		if err == nil {
   732  			err = err1
   733  		}
   734  	}
   735  	logrus.Debugf("Put succeeded on id %s", id)
   736  	return err
   737  }
   738  
   739  // Cleanup ensures the information the driver stores is properly removed.
   740  // We use this opportunity to cleanup any -removing folders which may be
   741  // still left if the daemon was killed while it was removing a layer.
   742  func (d *Driver) Cleanup() error {
   743  	title := "lcowdriver: cleanup"
   744  
   745  	items, err := ioutil.ReadDir(d.dataRoot)
   746  	if err != nil {
   747  		if os.IsNotExist(err) {
   748  			return nil
   749  		}
   750  		return err
   751  	}
   752  
   753  	// Note we don't return an error below - it's possible the files
   754  	// are locked. However, next time around after the daemon exits,
   755  	// we likely will be able to cleanup successfully. Instead we log
   756  	// warnings if there are errors.
   757  	for _, item := range items {
   758  		if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
   759  			if err := os.RemoveAll(filepath.Join(d.dataRoot, item.Name())); err != nil {
   760  				logrus.Warnf("%s failed to cleanup %s: %s", title, item.Name(), err)
   761  			} else {
   762  				logrus.Infof("%s cleaned up %s", title, item.Name())
   763  			}
   764  		}
   765  	}
   766  
   767  	// Cleanup any service VMs we have running, along with their scratch spaces.
   768  	// We don't take the lock for this as it's taken in terminateServiceVm.
   769  	for k, v := range d.serviceVms.svms {
   770  		logrus.Debugf("%s svm entry: %s: %+v", title, k, v)
   771  		d.terminateServiceVM(k, "cleanup", true)
   772  	}
   773  
   774  	return nil
   775  }
   776  
   777  // Diff takes a layer (and it's parent layer which may be null, but
   778  // is ignored by this implementation below) and returns a reader for
   779  // a tarstream representing the layers contents. The id could be
   780  // a read-only "layer.vhd" or a read-write "sandbox.vhdx". The semantics
   781  // of this function dictate that the layer is already mounted.
   782  // However, as we do lazy mounting as a performance optimisation,
   783  // this will likely not be the case.
   784  func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
   785  	title := fmt.Sprintf("lcowdriver: diff: %s", id)
   786  
   787  	// Get VHDX info
   788  	ld, err := getLayerDetails(d.dir(id))
   789  	if err != nil {
   790  		logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
   791  		return nil, err
   792  	}
   793  
   794  	// Start the SVM with a mapped virtual disk. Note that if the SVM is
   795  	// already running and we are in global mode, this will be
   796  	// hot-added.
   797  	mvd := hcsshim.MappedVirtualDisk{
   798  		HostPath:          ld.filename,
   799  		ContainerPath:     hostToGuest(ld.filename),
   800  		CreateInUtilityVM: true,
   801  		ReadOnly:          true,
   802  	}
   803  
   804  	logrus.Debugf("%s: starting service VM", title)
   805  	svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diff %s", id))
   806  	if err != nil {
   807  		return nil, err
   808  	}
   809  
   810  	logrus.Debugf("lcowdriver: diff: waiting for svm to finish booting")
   811  	err = svm.getStartError()
   812  	if err != nil {
   813  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   814  		return nil, fmt.Errorf("lcowdriver: diff: svm failed to boot: %s", err)
   815  	}
   816  
   817  	// Obtain the tar stream for it
   818  	// The actual container path will have be remapped to a short name, so use that.
   819  	actualContainerPath := svm.getShortContainerPath(&mvd)
   820  	if actualContainerPath == "" {
   821  		return nil, fmt.Errorf("failed to get short container path for %+v in SVM %s", mvd, svm.config.Name)
   822  	}
   823  	logrus.Debugf("%s: %s %s, size %d, ReadOnly %t", title, ld.filename, actualContainerPath, ld.size, ld.isSandbox)
   824  	tarReadCloser, err := svm.config.VhdToTar(mvd.HostPath, actualContainerPath, ld.isSandbox, ld.size)
   825  	if err != nil {
   826  		svm.hotRemoveVHDs(mvd)
   827  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   828  		return nil, fmt.Errorf("%s failed to export layer to tar stream for id: %s, parent: %s : %s", title, id, parent, err)
   829  	}
   830  
   831  	logrus.Debugf("%s id %s parent %s completed successfully", title, id, parent)
   832  
   833  	// In safe/non-global mode, we can't tear down the service VM until things have been read.
   834  	return ioutils.NewReadCloserWrapper(tarReadCloser, func() error {
   835  		tarReadCloser.Close()
   836  		svm.hotRemoveVHDs(mvd)
   837  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
   838  		return nil
   839  	}), nil
   840  }
   841  
   842  // ApplyDiff extracts the changeset from the given diff into the
   843  // layer with the specified id and parent, returning the size of the
   844  // new layer in bytes. The layer should not be mounted when calling
   845  // this function. Another way of describing this is that ApplyDiff writes
   846  // to a new layer (a VHD in LCOW) the contents of a tarstream it's given.
   847  func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
   848  	logrus.Debugf("lcowdriver: applydiff: id %s", id)
   849  
   850  	// Log failures here as it's undiagnosable sometimes, due to a possible panic.
   851  	// See https://github.com/moby/moby/issues/37955 for more information.
   852  
   853  	dest := filepath.Join(d.dataRoot, id, layerFilename)
   854  	if !noreexec {
   855  		cmd := reexec.Command([]string{"docker-lcow-tar2ext4", dest}...)
   856  		stdout := bytes.NewBuffer(nil)
   857  		stderr := bytes.NewBuffer(nil)
   858  		cmd.Stdin = diff
   859  		cmd.Stdout = stdout
   860  		cmd.Stderr = stderr
   861  
   862  		if err := cmd.Start(); err != nil {
   863  			logrus.Warnf("lcowdriver: applydiff: id %s failed to start re-exec: %s", id, err)
   864  			return 0, err
   865  		}
   866  
   867  		if err := cmd.Wait(); err != nil {
   868  			logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
   869  			return 0, fmt.Errorf("re-exec error: %v: stderr: %s", err, stderr)
   870  		}
   871  
   872  		size, err := strconv.ParseInt(stdout.String(), 10, 64)
   873  		if err != nil {
   874  			logrus.Warnf("lcowdriver: applydiff: id %s failed to parse output %s", id, err)
   875  			return 0, fmt.Errorf("re-exec error: %v: stdout: %s", err, stdout)
   876  		}
   877  		return applySID(id, size, dest)
   878  
   879  	}
   880  	// The inline case
   881  	size, err := tar2ext4Actual(dest, diff)
   882  	if err != nil {
   883  		logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
   884  	}
   885  	return applySID(id, size, dest)
   886  }
   887  
   888  // applySID adds the VM Group SID read-only access.
   889  func applySID(id string, size int64, dest string) (int64, error) {
   890  	if err := security.GrantVmGroupAccess(dest); err != nil {
   891  		logrus.Warnf("lcowdriver: applySIDs: id %s failed %s", id, err)
   892  		return 0, err
   893  	}
   894  	return size, nil
   895  }
   896  
   897  // tar2ext4Reexec is the re-exec entry point for writing a layer from a tar file
   898  func tar2ext4Reexec() {
   899  	size, err := tar2ext4Actual(os.Args[1], os.Stdin)
   900  	if err != nil {
   901  		fmt.Fprint(os.Stderr, err)
   902  		os.Exit(1)
   903  	}
   904  	fmt.Fprint(os.Stdout, size)
   905  }
   906  
   907  // tar2ext4Actual is the implementation of tar2ext to write a layer from a tar file.
   908  // It can be called through re-exec (default), or inline for debugging.
   909  func tar2ext4Actual(dest string, diff io.Reader) (int64, error) {
   910  	// maxDiskSize is not relating to the sandbox size - this is the
   911  	// maximum possible size a layer VHD generated can be from an EXT4
   912  	// layout perspective.
   913  	const maxDiskSize = 128 * 1024 * 1024 * 1024 // 128GB
   914  	out, err := os.Create(dest)
   915  	if err != nil {
   916  		return 0, err
   917  	}
   918  	defer out.Close()
   919  	if err := tar2ext4.Convert(
   920  		diff,
   921  		out,
   922  		tar2ext4.AppendVhdFooter,
   923  		tar2ext4.ConvertWhiteout,
   924  		tar2ext4.MaximumDiskSize(maxDiskSize)); err != nil {
   925  		return 0, err
   926  	}
   927  	fi, err := os.Stat(dest)
   928  	if err != nil {
   929  		return 0, err
   930  	}
   931  	return fi.Size(), nil
   932  }
   933  
   934  // Changes produces a list of changes between the specified layer
   935  // and its parent layer. If parent is "", then all changes will be ADD changes.
   936  // The layer should not be mounted when calling this function.
   937  func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
   938  	logrus.Debugf("lcowdriver: changes: id %s parent %s", id, parent)
   939  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   940  	return nil, nil
   941  }
   942  
   943  // DiffSize calculates the changes between the specified layer
   944  // and its parent and returns the size in bytes of the changes
   945  // relative to its base filesystem directory.
   946  func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
   947  	logrus.Debugf("lcowdriver: diffsize: id %s", id)
   948  	// TODO @gupta-ak. Needs implementation with assistance from service VM
   949  	return 0, nil
   950  }
   951  
   952  // GetMetadata returns custom driver information.
   953  func (d *Driver) GetMetadata(id string) (map[string]string, error) {
   954  	logrus.Debugf("lcowdriver: getmetadata: id %s", id)
   955  	m := make(map[string]string)
   956  	m["dir"] = d.dir(id)
   957  	return m, nil
   958  }
   959  
   960  // GetLayerPath gets the layer path on host (path to VHD/VHDX)
   961  func (d *Driver) GetLayerPath(id string) (string, error) {
   962  	return d.dir(id), nil
   963  }
   964  
   965  // dir returns the absolute path to the layer.
   966  func (d *Driver) dir(id string) string {
   967  	return filepath.Join(d.dataRoot, filepath.Base(id))
   968  }
   969  
   970  // getLayerChain returns the layer chain information.
   971  func (d *Driver) getLayerChain(id string) ([]string, error) {
   972  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   973  	logrus.Debugf("lcowdriver: getlayerchain: id %s json %s", id, jPath)
   974  	content, err := ioutil.ReadFile(jPath)
   975  	if os.IsNotExist(err) {
   976  		return nil, nil
   977  	} else if err != nil {
   978  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s unable to read layerchain file %s: %s", id, jPath, err)
   979  	}
   980  
   981  	var layerChain []string
   982  	err = json.Unmarshal(content, &layerChain)
   983  	if err != nil {
   984  		return nil, fmt.Errorf("lcowdriver: getlayerchain: %s failed to unmarshall layerchain file %s: %s", id, jPath, err)
   985  	}
   986  	return layerChain, nil
   987  }
   988  
   989  // setLayerChain stores the layer chain information on disk.
   990  func (d *Driver) setLayerChain(id string, chain []string) error {
   991  	content, err := json.Marshal(&chain)
   992  	if err != nil {
   993  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to marshall layerchain json: %s", id, err)
   994  	}
   995  
   996  	jPath := filepath.Join(d.dir(id), "layerchain.json")
   997  	logrus.Debugf("lcowdriver: setlayerchain: id %s json %s", id, jPath)
   998  	err = ioutil.WriteFile(jPath, content, 0600)
   999  	if err != nil {
  1000  		return fmt.Errorf("lcowdriver: setlayerchain: %s failed to write layerchain file: %s", id, err)
  1001  	}
  1002  	return nil
  1003  }
  1004  
  1005  // getLayerDetails is a utility for getting a file name, size and indication of
  1006  // sandbox for a VHD(x) in a folder. A read-only layer will be layer.vhd. A
  1007  // read-write layer will be sandbox.vhdx.
  1008  func getLayerDetails(folder string) (*layerDetails, error) {
  1009  	var fileInfo os.FileInfo
  1010  	ld := &layerDetails{
  1011  		isSandbox: false,
  1012  		filename:  filepath.Join(folder, layerFilename),
  1013  	}
  1014  
  1015  	fileInfo, err := os.Stat(ld.filename)
  1016  	if err != nil {
  1017  		ld.filename = filepath.Join(folder, sandboxFilename)
  1018  		if fileInfo, err = os.Stat(ld.filename); err != nil {
  1019  			return nil, fmt.Errorf("failed to locate layer or sandbox in %s", folder)
  1020  		}
  1021  		ld.isSandbox = true
  1022  	}
  1023  	ld.size = fileInfo.Size()
  1024  
  1025  	return ld, nil
  1026  }
  1027  
  1028  func (d *Driver) getAllMounts(id string) ([]hcsshim.MappedVirtualDisk, error) {
  1029  	layerChain, err := d.getLayerChain(id)
  1030  	if err != nil {
  1031  		return nil, err
  1032  	}
  1033  	layerChain = append([]string{d.dir(id)}, layerChain...)
  1034  
  1035  	logrus.Debugf("getting all  layers: %v", layerChain)
  1036  	disks := make([]hcsshim.MappedVirtualDisk, len(layerChain), len(layerChain))
  1037  	for i := range layerChain {
  1038  		ld, err := getLayerDetails(layerChain[i])
  1039  		if err != nil {
  1040  			logrus.Debugf("Failed to get LayerVhdDetails from %s: %s", layerChain[i], err)
  1041  			return nil, err
  1042  		}
  1043  		disks[i].HostPath = ld.filename
  1044  		disks[i].ContainerPath = hostToGuest(ld.filename)
  1045  		disks[i].CreateInUtilityVM = true
  1046  		disks[i].ReadOnly = !ld.isSandbox
  1047  	}
  1048  	return disks, nil
  1049  }
  1050  
  1051  func hostToGuest(hostpath string) string {
  1052  	// This is the "long" container path. At the point of which we are
  1053  	// calculating this, we don't know which service VM we're going to be
  1054  	// using, so we can't translate this to a short path yet, instead
  1055  	// deferring until the point of which it's added to an SVM. We don't
  1056  	// use long container paths in SVMs for SCSI disks, otherwise it can cause
  1057  	// command line operations that we invoke to fail due to being over ~4200
  1058  	// characters when there are ~47 layers involved. An example of this is
  1059  	// the mount call to create the overlay across multiple SCSI-attached disks.
  1060  	// It doesn't affect VPMem attached layers during container creation as
  1061  	// these get mapped by openGCS to /tmp/N/M where N is a container instance
  1062  	// number, and M is a layer number.
  1063  	return fmt.Sprintf("/tmp/%s", filepath.Base(filepath.Dir(hostpath)))
  1064  }
  1065  
  1066  func unionMountName(disks []hcsshim.MappedVirtualDisk) string {
  1067  	return fmt.Sprintf("%s-mount", disks[0].ContainerPath)
  1068  }
  1069  
  1070  type nopCloser struct {
  1071  	io.Reader
  1072  }
  1073  
  1074  func (nopCloser) Close() error {
  1075  	return nil
  1076  }
  1077  
  1078  type fileGetCloserFromSVM struct {
  1079  	id  string
  1080  	svm *serviceVM
  1081  	mvd *hcsshim.MappedVirtualDisk
  1082  	d   *Driver
  1083  }
  1084  
  1085  func (fgc *fileGetCloserFromSVM) Close() error {
  1086  	if fgc.svm != nil {
  1087  		if fgc.mvd != nil {
  1088  			if err := fgc.svm.hotRemoveVHDs(*fgc.mvd); err != nil {
  1089  				// We just log this as we're going to tear down the SVM imminently unless in global mode
  1090  				logrus.Errorf("failed to remove mvd %s: %s", fgc.mvd.ContainerPath, err)
  1091  			}
  1092  		}
  1093  	}
  1094  	if fgc.d != nil && fgc.svm != nil && fgc.id != "" {
  1095  		if err := fgc.d.terminateServiceVM(fgc.id, fmt.Sprintf("diffgetter %s", fgc.id), false); err != nil {
  1096  			return err
  1097  		}
  1098  	}
  1099  	return nil
  1100  }
  1101  
  1102  func (fgc *fileGetCloserFromSVM) Get(filename string) (io.ReadCloser, error) {
  1103  	errOut := &bytes.Buffer{}
  1104  	outOut := &bytes.Buffer{}
  1105  	// Must map to the actual "short" container path where the SCSI disk was mounted
  1106  	actualContainerPath := fgc.svm.getShortContainerPath(fgc.mvd)
  1107  	if actualContainerPath == "" {
  1108  		return nil, fmt.Errorf("inconsistency detected: couldn't get short container path for %+v in utility VM %s", fgc.mvd, fgc.svm.config.Name)
  1109  	}
  1110  	file := path.Join(actualContainerPath, filename)
  1111  
  1112  	// Ugly fix for MSFT internal bug VSO#19696554
  1113  	// If a file name contains a space, pushing an image fails.
  1114  	// Using solution from https://groups.google.com/forum/#!topic/Golang-Nuts/DpldsmrhPio to escape for shell execution
  1115  	file = "'" + strings.Join(strings.Split(file, "'"), `'"'"'`) + "'"
  1116  	if err := fgc.svm.runProcess(fmt.Sprintf("cat %s", file), nil, outOut, errOut); err != nil {
  1117  		logrus.Debugf("cat %s failed: %s", file, errOut.String())
  1118  		return nil, err
  1119  	}
  1120  	return nopCloser{bytes.NewReader(outOut.Bytes())}, nil
  1121  }
  1122  
  1123  // DiffGetter returns a FileGetCloser that can read files from the directory that
  1124  // contains files for the layer differences. Used for direct access for tar-split.
  1125  func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
  1126  	title := fmt.Sprintf("lcowdriver: diffgetter: %s", id)
  1127  	logrus.Debugf(title)
  1128  
  1129  	ld, err := getLayerDetails(d.dir(id))
  1130  	if err != nil {
  1131  		logrus.Debugf("%s: failed to get vhdx information of %s: %s", title, d.dir(id), err)
  1132  		return nil, err
  1133  	}
  1134  
  1135  	// Start the SVM with a mapped virtual disk. Note that if the SVM is
  1136  	// already running and we are in global mode, this will be hot-added.
  1137  	mvd := hcsshim.MappedVirtualDisk{
  1138  		HostPath:          ld.filename,
  1139  		ContainerPath:     hostToGuest(ld.filename),
  1140  		CreateInUtilityVM: true,
  1141  		ReadOnly:          true,
  1142  	}
  1143  
  1144  	logrus.Debugf("%s: starting service VM", title)
  1145  	svm, err := d.startServiceVMIfNotRunning(id, []hcsshim.MappedVirtualDisk{mvd}, fmt.Sprintf("diffgetter %s", id))
  1146  	if err != nil {
  1147  		return nil, err
  1148  	}
  1149  
  1150  	logrus.Debugf("%s: waiting for svm to finish booting", title)
  1151  	err = svm.getStartError()
  1152  	if err != nil {
  1153  		d.terminateServiceVM(id, fmt.Sprintf("diff %s", id), false)
  1154  		return nil, fmt.Errorf("%s: svm failed to boot: %s", title, err)
  1155  	}
  1156  
  1157  	return &fileGetCloserFromSVM{
  1158  		id:  id,
  1159  		svm: svm,
  1160  		mvd: &mvd,
  1161  		d:   d}, nil
  1162  }