github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/graphdriver/overlay2/overlay.go (about)

     1  // +build linux
     2  
     3  package overlay2 // import "github.com/docker/docker/daemon/graphdriver/overlay2"
     4  
     5  import (
     6  	"bufio"
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"os/exec"
    14  	"path"
    15  	"path/filepath"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  
    20  	"github.com/docker/docker/daemon/graphdriver"
    21  	"github.com/docker/docker/daemon/graphdriver/overlayutils"
    22  	"github.com/docker/docker/daemon/graphdriver/quota"
    23  	"github.com/docker/docker/pkg/archive"
    24  	"github.com/docker/docker/pkg/chrootarchive"
    25  	"github.com/docker/docker/pkg/containerfs"
    26  	"github.com/docker/docker/pkg/directory"
    27  	"github.com/docker/docker/pkg/fsutils"
    28  	"github.com/docker/docker/pkg/idtools"
    29  	"github.com/docker/docker/pkg/locker"
    30  	"github.com/docker/docker/pkg/mount"
    31  	"github.com/docker/docker/pkg/parsers"
    32  	"github.com/docker/docker/pkg/parsers/kernel"
    33  	"github.com/docker/docker/pkg/system"
    34  	"github.com/docker/go-units"
    35  	rsystem "github.com/opencontainers/runc/libcontainer/system"
    36  	"github.com/opencontainers/selinux/go-selinux/label"
    37  	"github.com/sirupsen/logrus"
    38  	"golang.org/x/sys/unix"
    39  )
    40  
    41  var (
    42  	// untar defines the untar method
    43  	untar = chrootarchive.UntarUncompressed
    44  )
    45  
    46  // This backend uses the overlay union filesystem for containers
    47  // with diff directories for each layer.
    48  
    49  // This version of the overlay driver requires at least kernel
    50  // 4.0.0 in order to support mounting multiple diff directories.
    51  
    52  // Each container/image has at least a "diff" directory and "link" file.
    53  // If there is also a "lower" file when there are diff layers
    54  // below as well as "merged" and "work" directories. The "diff" directory
    55  // has the upper layer of the overlay and is used to capture any
    56  // changes to the layer. The "lower" file contains all the lower layer
    57  // mounts separated by ":" and ordered from uppermost to lowermost
    58  // layers. The overlay itself is mounted in the "merged" directory,
    59  // and the "work" dir is needed for overlay to work.
    60  
    61  // The "link" file for each layer contains a unique string for the layer.
    62  // Under the "l" directory at the root there will be a symbolic link
    63  // with that unique string pointing the "diff" directory for the layer.
    64  // The symbolic links are used to reference lower layers in the "lower"
    65  // file and on mount. The links are used to shorten the total length
    66  // of a layer reference without requiring changes to the layer identifier
    67  // or root directory. Mounts are always done relative to root and
    68  // referencing the symbolic links in order to ensure the number of
    69  // lower directories can fit in a single page for making the mount
    70  // syscall. A hard upper limit of 128 lower layers is enforced to ensure
    71  // that mounts do not fail due to length.
    72  
    73  const (
    74  	driverName = "overlay2"
    75  	linkDir    = "l"
    76  	lowerFile  = "lower"
    77  	maxDepth   = 128
    78  
    79  	// idLength represents the number of random characters
    80  	// which can be used to create the unique link identifier
    81  	// for every layer. If this value is too long then the
    82  	// page size limit for the mount command may be exceeded.
    83  	// The idLength should be selected such that following equation
    84  	// is true (512 is a buffer for label metadata).
    85  	// ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512)
    86  	idLength = 26
    87  )
    88  
    89  type overlayOptions struct {
    90  	overrideKernelCheck bool
    91  	quota               quota.Quota
    92  }
    93  
    94  // Driver contains information about the home directory and the list of active
    95  // mounts that are created using this driver.
    96  type Driver struct {
    97  	home          string
    98  	uidMaps       []idtools.IDMap
    99  	gidMaps       []idtools.IDMap
   100  	ctr           *graphdriver.RefCounter
   101  	quotaCtl      *quota.Control
   102  	options       overlayOptions
   103  	naiveDiff     graphdriver.DiffDriver
   104  	supportsDType bool
   105  	locker        *locker.Locker
   106  }
   107  
   108  var (
   109  	logger                = logrus.WithField("storage-driver", "overlay2")
   110  	backingFs             = "<unknown>"
   111  	projectQuotaSupported = false
   112  
   113  	useNaiveDiffLock sync.Once
   114  	useNaiveDiffOnly bool
   115  
   116  	indexOff string
   117  )
   118  
   119  func init() {
   120  	graphdriver.Register(driverName, Init)
   121  }
   122  
   123  // Init returns the native diff driver for overlay filesystem.
   124  // If overlay filesystem is not supported on the host, the error
   125  // graphdriver.ErrNotSupported is returned.
   126  // If an overlay filesystem is not supported over an existing filesystem then
   127  // the error graphdriver.ErrIncompatibleFS is returned.
   128  func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
   129  	opts, err := parseOptions(options)
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  
   134  	if err := supportsOverlay(); err != nil {
   135  		return nil, graphdriver.ErrNotSupported
   136  	}
   137  
   138  	// require kernel 4.0.0 to ensure multiple lower dirs are supported
   139  	v, err := kernel.GetKernelVersion()
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	// Perform feature detection on /var/lib/docker/overlay2 if it's an existing directory.
   145  	// This covers situations where /var/lib/docker/overlay2 is a mount, and on a different
   146  	// filesystem than /var/lib/docker.
   147  	// If the path does not exist, fall back to using /var/lib/docker for feature detection.
   148  	testdir := home
   149  	if _, err := os.Stat(testdir); os.IsNotExist(err) {
   150  		testdir = filepath.Dir(testdir)
   151  	}
   152  
   153  	fsMagic, err := graphdriver.GetFSMagic(testdir)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  	if fsName, ok := graphdriver.FsNames[fsMagic]; ok {
   158  		backingFs = fsName
   159  	}
   160  
   161  	switch fsMagic {
   162  	case graphdriver.FsMagicAufs, graphdriver.FsMagicEcryptfs, graphdriver.FsMagicNfsFs, graphdriver.FsMagicOverlay, graphdriver.FsMagicZfs:
   163  		logger.Errorf("'overlay2' is not supported over %s", backingFs)
   164  		return nil, graphdriver.ErrIncompatibleFS
   165  	case graphdriver.FsMagicBtrfs:
   166  		// Support for OverlayFS on BTRFS was added in kernel 4.7
   167  		// See https://btrfs.wiki.kernel.org/index.php/Changelog
   168  		if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 7, Minor: 0}) < 0 {
   169  			if !opts.overrideKernelCheck {
   170  				logger.Errorf("'overlay2' requires kernel 4.7 to use on %s", backingFs)
   171  				return nil, graphdriver.ErrIncompatibleFS
   172  			}
   173  			logger.Warn("Using pre-4.7.0 kernel for overlay2 on btrfs, may require kernel update")
   174  		}
   175  	}
   176  
   177  	if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 0, Minor: 0}) < 0 {
   178  		if opts.overrideKernelCheck {
   179  			logger.Warn("Using pre-4.0.0 kernel for overlay2, mount failures may require kernel update")
   180  		} else {
   181  			if err := supportsMultipleLowerDir(testdir); err != nil {
   182  				logger.Debugf("Multiple lower dirs not supported: %v", err)
   183  				return nil, graphdriver.ErrNotSupported
   184  			}
   185  		}
   186  	}
   187  	supportsDType, err := fsutils.SupportsDType(testdir)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	if !supportsDType {
   192  		if !graphdriver.IsInitialized(home) {
   193  			return nil, overlayutils.ErrDTypeNotSupported("overlay2", backingFs)
   194  		}
   195  		// allow running without d_type only for existing setups (#27443)
   196  		logger.Warn(overlayutils.ErrDTypeNotSupported("overlay2", backingFs))
   197  	}
   198  
   199  	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  	// Create the driver home dir
   204  	if err := idtools.MkdirAllAndChown(path.Join(home, linkDir), 0700, idtools.Identity{UID: rootUID, GID: rootGID}); err != nil {
   205  		return nil, err
   206  	}
   207  
   208  	d := &Driver{
   209  		home:          home,
   210  		uidMaps:       uidMaps,
   211  		gidMaps:       gidMaps,
   212  		ctr:           graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
   213  		supportsDType: supportsDType,
   214  		locker:        locker.New(),
   215  		options:       *opts,
   216  	}
   217  
   218  	d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps)
   219  
   220  	if backingFs == "xfs" {
   221  		// Try to enable project quota support over xfs.
   222  		if d.quotaCtl, err = quota.NewControl(home); err == nil {
   223  			projectQuotaSupported = true
   224  		} else if opts.quota.Size > 0 {
   225  			return nil, fmt.Errorf("Storage option overlay2.size not supported. Filesystem does not support Project Quota: %v", err)
   226  		}
   227  	} else if opts.quota.Size > 0 {
   228  		// if xfs is not the backing fs then error out if the storage-opt overlay2.size is used.
   229  		return nil, fmt.Errorf("Storage Option overlay2.size only supported for backingFS XFS. Found %v", backingFs)
   230  	}
   231  
   232  	// figure out whether "index=off" option is recognized by the kernel
   233  	_, err = os.Stat("/sys/module/overlay/parameters/index")
   234  	switch {
   235  	case err == nil:
   236  		indexOff = "index=off,"
   237  	case os.IsNotExist(err):
   238  		// old kernel, no index -- do nothing
   239  	default:
   240  		logger.Warnf("Unable to detect whether overlay kernel module supports index parameter: %s", err)
   241  	}
   242  
   243  	logger.Debugf("backingFs=%s, projectQuotaSupported=%v, indexOff=%q", backingFs, projectQuotaSupported, indexOff)
   244  
   245  	return d, nil
   246  }
   247  
   248  func parseOptions(options []string) (*overlayOptions, error) {
   249  	o := &overlayOptions{}
   250  	for _, option := range options {
   251  		key, val, err := parsers.ParseKeyValueOpt(option)
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  		key = strings.ToLower(key)
   256  		switch key {
   257  		case "overlay2.override_kernel_check":
   258  			o.overrideKernelCheck, err = strconv.ParseBool(val)
   259  			if err != nil {
   260  				return nil, err
   261  			}
   262  		case "overlay2.size":
   263  			size, err := units.RAMInBytes(val)
   264  			if err != nil {
   265  				return nil, err
   266  			}
   267  			o.quota.Size = uint64(size)
   268  		default:
   269  			return nil, fmt.Errorf("overlay2: unknown option %s", key)
   270  		}
   271  	}
   272  	return o, nil
   273  }
   274  
   275  func supportsOverlay() error {
   276  	// We can try to modprobe overlay first before looking at
   277  	// proc/filesystems for when overlay is supported
   278  	exec.Command("modprobe", "overlay").Run()
   279  
   280  	f, err := os.Open("/proc/filesystems")
   281  	if err != nil {
   282  		return err
   283  	}
   284  	defer f.Close()
   285  
   286  	s := bufio.NewScanner(f)
   287  	for s.Scan() {
   288  		if s.Text() == "nodev\toverlay" {
   289  			return nil
   290  		}
   291  	}
   292  	logger.Error("'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.")
   293  	return graphdriver.ErrNotSupported
   294  }
   295  
   296  func useNaiveDiff(home string) bool {
   297  	useNaiveDiffLock.Do(func() {
   298  		if err := doesSupportNativeDiff(home); err != nil {
   299  			logger.Warnf("Not using native diff for overlay2, this may cause degraded performance for building images: %v", err)
   300  			useNaiveDiffOnly = true
   301  		}
   302  	})
   303  	return useNaiveDiffOnly
   304  }
   305  
   306  func (d *Driver) String() string {
   307  	return driverName
   308  }
   309  
   310  // Status returns current driver information in a two dimensional string array.
   311  // Output contains "Backing Filesystem" used in this implementation.
   312  func (d *Driver) Status() [][2]string {
   313  	return [][2]string{
   314  		{"Backing Filesystem", backingFs},
   315  		{"Supports d_type", strconv.FormatBool(d.supportsDType)},
   316  		{"Native Overlay Diff", strconv.FormatBool(!useNaiveDiff(d.home))},
   317  	}
   318  }
   319  
   320  // GetMetadata returns metadata about the overlay driver such as the LowerDir,
   321  // UpperDir, WorkDir, and MergeDir used to store data.
   322  func (d *Driver) GetMetadata(id string) (map[string]string, error) {
   323  	dir := d.dir(id)
   324  	if _, err := os.Stat(dir); err != nil {
   325  		return nil, err
   326  	}
   327  
   328  	metadata := map[string]string{
   329  		"WorkDir":   path.Join(dir, "work"),
   330  		"MergedDir": path.Join(dir, "merged"),
   331  		"UpperDir":  path.Join(dir, "diff"),
   332  	}
   333  
   334  	lowerDirs, err := d.getLowerDirs(id)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  	if len(lowerDirs) > 0 {
   339  		metadata["LowerDir"] = strings.Join(lowerDirs, ":")
   340  	}
   341  
   342  	return metadata, nil
   343  }
   344  
   345  // Cleanup any state created by overlay which should be cleaned when daemon
   346  // is being shutdown. For now, we just have to unmount the bind mounted
   347  // we had created.
   348  func (d *Driver) Cleanup() error {
   349  	return mount.RecursiveUnmount(d.home)
   350  }
   351  
   352  // CreateReadWrite creates a layer that is writable for use as a container
   353  // file system.
   354  func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
   355  	if opts != nil && len(opts.StorageOpt) != 0 && !projectQuotaSupported {
   356  		return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
   357  	}
   358  
   359  	if opts == nil {
   360  		opts = &graphdriver.CreateOpts{
   361  			StorageOpt: map[string]string{},
   362  		}
   363  	}
   364  
   365  	if _, ok := opts.StorageOpt["size"]; !ok {
   366  		if opts.StorageOpt == nil {
   367  			opts.StorageOpt = map[string]string{}
   368  		}
   369  		opts.StorageOpt["size"] = strconv.FormatUint(d.options.quota.Size, 10)
   370  	}
   371  
   372  	return d.create(id, parent, opts)
   373  }
   374  
   375  // Create is used to create the upper, lower, and merge directories required for overlay fs for a given id.
   376  // The parent filesystem is used to configure these directories for the overlay.
   377  func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
   378  	if opts != nil && len(opts.StorageOpt) != 0 {
   379  		if _, ok := opts.StorageOpt["size"]; ok {
   380  			return fmt.Errorf("--storage-opt size is only supported for ReadWrite Layers")
   381  		}
   382  	}
   383  	return d.create(id, parent, opts)
   384  }
   385  
   386  func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
   387  	dir := d.dir(id)
   388  
   389  	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
   390  	if err != nil {
   391  		return err
   392  	}
   393  	root := idtools.Identity{UID: rootUID, GID: rootGID}
   394  
   395  	if err := idtools.MkdirAllAndChown(path.Dir(dir), 0700, root); err != nil {
   396  		return err
   397  	}
   398  	if err := idtools.MkdirAndChown(dir, 0700, root); err != nil {
   399  		return err
   400  	}
   401  
   402  	defer func() {
   403  		// Clean up on failure
   404  		if retErr != nil {
   405  			os.RemoveAll(dir)
   406  		}
   407  	}()
   408  
   409  	if opts != nil && len(opts.StorageOpt) > 0 {
   410  		driver := &Driver{}
   411  		if err := d.parseStorageOpt(opts.StorageOpt, driver); err != nil {
   412  			return err
   413  		}
   414  
   415  		if driver.options.quota.Size > 0 {
   416  			// Set container disk quota limit
   417  			if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
   418  				return err
   419  			}
   420  		}
   421  	}
   422  
   423  	if err := idtools.MkdirAndChown(path.Join(dir, "diff"), 0755, root); err != nil {
   424  		return err
   425  	}
   426  
   427  	lid := generateID(idLength)
   428  	if err := os.Symlink(path.Join("..", id, "diff"), path.Join(d.home, linkDir, lid)); err != nil {
   429  		return err
   430  	}
   431  
   432  	// Write link id to link file
   433  	if err := ioutil.WriteFile(path.Join(dir, "link"), []byte(lid), 0644); err != nil {
   434  		return err
   435  	}
   436  
   437  	// if no parent directory, done
   438  	if parent == "" {
   439  		return nil
   440  	}
   441  
   442  	if err := idtools.MkdirAndChown(path.Join(dir, "work"), 0700, root); err != nil {
   443  		return err
   444  	}
   445  
   446  	lower, err := d.getLower(parent)
   447  	if err != nil {
   448  		return err
   449  	}
   450  	if lower != "" {
   451  		if err := ioutil.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0666); err != nil {
   452  			return err
   453  		}
   454  	}
   455  
   456  	return nil
   457  }
   458  
   459  // Parse overlay storage options
   460  func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
   461  	// Read size to set the disk project quota per container
   462  	for key, val := range storageOpt {
   463  		key := strings.ToLower(key)
   464  		switch key {
   465  		case "size":
   466  			size, err := units.RAMInBytes(val)
   467  			if err != nil {
   468  				return err
   469  			}
   470  			driver.options.quota.Size = uint64(size)
   471  		default:
   472  			return fmt.Errorf("Unknown option %s", key)
   473  		}
   474  	}
   475  
   476  	return nil
   477  }
   478  
   479  func (d *Driver) getLower(parent string) (string, error) {
   480  	parentDir := d.dir(parent)
   481  
   482  	// Ensure parent exists
   483  	if _, err := os.Lstat(parentDir); err != nil {
   484  		return "", err
   485  	}
   486  
   487  	// Read Parent link fileA
   488  	parentLink, err := ioutil.ReadFile(path.Join(parentDir, "link"))
   489  	if err != nil {
   490  		return "", err
   491  	}
   492  	lowers := []string{path.Join(linkDir, string(parentLink))}
   493  
   494  	parentLower, err := ioutil.ReadFile(path.Join(parentDir, lowerFile))
   495  	if err == nil {
   496  		parentLowers := strings.Split(string(parentLower), ":")
   497  		lowers = append(lowers, parentLowers...)
   498  	}
   499  	if len(lowers) > maxDepth {
   500  		return "", errors.New("max depth exceeded")
   501  	}
   502  	return strings.Join(lowers, ":"), nil
   503  }
   504  
   505  func (d *Driver) dir(id string) string {
   506  	return path.Join(d.home, id)
   507  }
   508  
   509  func (d *Driver) getLowerDirs(id string) ([]string, error) {
   510  	var lowersArray []string
   511  	lowers, err := ioutil.ReadFile(path.Join(d.dir(id), lowerFile))
   512  	if err == nil {
   513  		for _, s := range strings.Split(string(lowers), ":") {
   514  			lp, err := os.Readlink(path.Join(d.home, s))
   515  			if err != nil {
   516  				return nil, err
   517  			}
   518  			lowersArray = append(lowersArray, path.Clean(path.Join(d.home, linkDir, lp)))
   519  		}
   520  	} else if !os.IsNotExist(err) {
   521  		return nil, err
   522  	}
   523  	return lowersArray, nil
   524  }
   525  
   526  // Remove cleans the directories that are created for this id.
   527  func (d *Driver) Remove(id string) error {
   528  	if id == "" {
   529  		return fmt.Errorf("refusing to remove the directories: id is empty")
   530  	}
   531  	d.locker.Lock(id)
   532  	defer d.locker.Unlock(id)
   533  	dir := d.dir(id)
   534  	lid, err := ioutil.ReadFile(path.Join(dir, "link"))
   535  	if err == nil {
   536  		if len(lid) == 0 {
   537  			logger.Errorf("refusing to remove empty link for layer %v", id)
   538  		} else if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil {
   539  			logger.Debugf("Failed to remove link: %v", err)
   540  		}
   541  	}
   542  
   543  	if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) {
   544  		return err
   545  	}
   546  	return nil
   547  }
   548  
   549  // Get creates and mounts the required file system for the given id and returns the mount path.
   550  func (d *Driver) Get(id, mountLabel string) (_ containerfs.ContainerFS, retErr error) {
   551  	d.locker.Lock(id)
   552  	defer d.locker.Unlock(id)
   553  	dir := d.dir(id)
   554  	if _, err := os.Stat(dir); err != nil {
   555  		return nil, err
   556  	}
   557  
   558  	diffDir := path.Join(dir, "diff")
   559  	lowers, err := ioutil.ReadFile(path.Join(dir, lowerFile))
   560  	if err != nil {
   561  		// If no lower, just return diff directory
   562  		if os.IsNotExist(err) {
   563  			return containerfs.NewLocalContainerFS(diffDir), nil
   564  		}
   565  		return nil, err
   566  	}
   567  
   568  	mergedDir := path.Join(dir, "merged")
   569  	if count := d.ctr.Increment(mergedDir); count > 1 {
   570  		return containerfs.NewLocalContainerFS(mergedDir), nil
   571  	}
   572  	defer func() {
   573  		if retErr != nil {
   574  			if c := d.ctr.Decrement(mergedDir); c <= 0 {
   575  				if mntErr := unix.Unmount(mergedDir, 0); mntErr != nil {
   576  					logger.Errorf("error unmounting %v: %v", mergedDir, mntErr)
   577  				}
   578  				// Cleanup the created merged directory; see the comment in Put's rmdir
   579  				if rmErr := unix.Rmdir(mergedDir); rmErr != nil && !os.IsNotExist(rmErr) {
   580  					logger.Debugf("Failed to remove %s: %v: %v", id, rmErr, err)
   581  				}
   582  			}
   583  		}
   584  	}()
   585  
   586  	workDir := path.Join(dir, "work")
   587  	splitLowers := strings.Split(string(lowers), ":")
   588  	absLowers := make([]string, len(splitLowers))
   589  	for i, s := range splitLowers {
   590  		absLowers[i] = path.Join(d.home, s)
   591  	}
   592  	opts := indexOff + "lowerdir=" + strings.Join(absLowers, ":") + ",upperdir=" + path.Join(dir, "diff") + ",workdir=" + path.Join(dir, "work")
   593  	mountData := label.FormatMountLabel(opts, mountLabel)
   594  	mount := unix.Mount
   595  	mountTarget := mergedDir
   596  
   597  	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
   598  	if err != nil {
   599  		return nil, err
   600  	}
   601  	if err := idtools.MkdirAndChown(mergedDir, 0700, idtools.Identity{UID: rootUID, GID: rootGID}); err != nil {
   602  		return nil, err
   603  	}
   604  
   605  	pageSize := unix.Getpagesize()
   606  
   607  	// Go can return a larger page size than supported by the system
   608  	// as of go 1.7. This will be fixed in 1.8 and this block can be
   609  	// removed when building with 1.8.
   610  	// See https://github.com/golang/go/commit/1b9499b06989d2831e5b156161d6c07642926ee1
   611  	// See https://github.com/docker/docker/issues/27384
   612  	if pageSize > 4096 {
   613  		pageSize = 4096
   614  	}
   615  
   616  	// Use relative paths and mountFrom when the mount data has exceeded
   617  	// the page size. The mount syscall fails if the mount data cannot
   618  	// fit within a page and relative links make the mount data much
   619  	// smaller at the expense of requiring a fork exec to chroot.
   620  	if len(mountData) > pageSize {
   621  		opts = indexOff + "lowerdir=" + string(lowers) + ",upperdir=" + path.Join(id, "diff") + ",workdir=" + path.Join(id, "work")
   622  		mountData = label.FormatMountLabel(opts, mountLabel)
   623  		if len(mountData) > pageSize {
   624  			return nil, fmt.Errorf("cannot mount layer, mount label too large %d", len(mountData))
   625  		}
   626  
   627  		mount = func(source string, target string, mType string, flags uintptr, label string) error {
   628  			return mountFrom(d.home, source, target, mType, flags, label)
   629  		}
   630  		mountTarget = path.Join(id, "merged")
   631  	}
   632  
   633  	if err := mount("overlay", mountTarget, "overlay", 0, mountData); err != nil {
   634  		return nil, fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
   635  	}
   636  
   637  	// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
   638  	// user namespace requires this to move a directory from lower to upper.
   639  	if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
   640  		return nil, err
   641  	}
   642  
   643  	return containerfs.NewLocalContainerFS(mergedDir), nil
   644  }
   645  
   646  // Put unmounts the mount path created for the give id.
   647  // It also removes the 'merged' directory to force the kernel to unmount the
   648  // overlay mount in other namespaces.
   649  func (d *Driver) Put(id string) error {
   650  	d.locker.Lock(id)
   651  	defer d.locker.Unlock(id)
   652  	dir := d.dir(id)
   653  	_, err := ioutil.ReadFile(path.Join(dir, lowerFile))
   654  	if err != nil {
   655  		// If no lower, no mount happened and just return directly
   656  		if os.IsNotExist(err) {
   657  			return nil
   658  		}
   659  		return err
   660  	}
   661  
   662  	mountpoint := path.Join(dir, "merged")
   663  	if count := d.ctr.Decrement(mountpoint); count > 0 {
   664  		return nil
   665  	}
   666  	if err := unix.Unmount(mountpoint, unix.MNT_DETACH); err != nil {
   667  		logger.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err)
   668  	}
   669  	// Remove the mountpoint here. Removing the mountpoint (in newer kernels)
   670  	// will cause all other instances of this mount in other mount namespaces
   671  	// to be unmounted. This is necessary to avoid cases where an overlay mount
   672  	// that is present in another namespace will cause subsequent mounts
   673  	// operations to fail with ebusy.  We ignore any errors here because this may
   674  	// fail on older kernels which don't have
   675  	// torvalds/linux@8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe applied.
   676  	if err := unix.Rmdir(mountpoint); err != nil && !os.IsNotExist(err) {
   677  		logger.Debugf("Failed to remove %s overlay: %v", id, err)
   678  	}
   679  	return nil
   680  }
   681  
   682  // Exists checks to see if the id is already mounted.
   683  func (d *Driver) Exists(id string) bool {
   684  	_, err := os.Stat(d.dir(id))
   685  	return err == nil
   686  }
   687  
   688  // isParent determines whether the given parent is the direct parent of the
   689  // given layer id
   690  func (d *Driver) isParent(id, parent string) bool {
   691  	lowers, err := d.getLowerDirs(id)
   692  	if err != nil {
   693  		return false
   694  	}
   695  	if parent == "" && len(lowers) > 0 {
   696  		return false
   697  	}
   698  
   699  	parentDir := d.dir(parent)
   700  	var ld string
   701  	if len(lowers) > 0 {
   702  		ld = filepath.Dir(lowers[0])
   703  	}
   704  	if ld == "" && parent == "" {
   705  		return true
   706  	}
   707  	return ld == parentDir
   708  }
   709  
   710  // ApplyDiff applies the new layer into a root
   711  func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64, err error) {
   712  	if !d.isParent(id, parent) {
   713  		return d.naiveDiff.ApplyDiff(id, parent, diff)
   714  	}
   715  
   716  	applyDir := d.getDiffPath(id)
   717  
   718  	logger.Debugf("Applying tar in %s", applyDir)
   719  	// Overlay doesn't need the parent id to apply the diff
   720  	if err := untar(diff, applyDir, &archive.TarOptions{
   721  		UIDMaps:        d.uidMaps,
   722  		GIDMaps:        d.gidMaps,
   723  		WhiteoutFormat: archive.OverlayWhiteoutFormat,
   724  		InUserNS:       rsystem.RunningInUserNS(),
   725  	}); err != nil {
   726  		return 0, err
   727  	}
   728  
   729  	return directory.Size(context.TODO(), applyDir)
   730  }
   731  
   732  func (d *Driver) getDiffPath(id string) string {
   733  	dir := d.dir(id)
   734  
   735  	return path.Join(dir, "diff")
   736  }
   737  
   738  // DiffSize calculates the changes between the specified id
   739  // and its parent and returns the size in bytes of the changes
   740  // relative to its base filesystem directory.
   741  func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
   742  	if useNaiveDiff(d.home) || !d.isParent(id, parent) {
   743  		return d.naiveDiff.DiffSize(id, parent)
   744  	}
   745  	return directory.Size(context.TODO(), d.getDiffPath(id))
   746  }
   747  
   748  // Diff produces an archive of the changes between the specified
   749  // layer and its parent layer which may be "".
   750  func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
   751  	if useNaiveDiff(d.home) || !d.isParent(id, parent) {
   752  		return d.naiveDiff.Diff(id, parent)
   753  	}
   754  
   755  	diffPath := d.getDiffPath(id)
   756  	logger.Debugf("Tar with options on %s", diffPath)
   757  	return archive.TarWithOptions(diffPath, &archive.TarOptions{
   758  		Compression:    archive.Uncompressed,
   759  		UIDMaps:        d.uidMaps,
   760  		GIDMaps:        d.gidMaps,
   761  		WhiteoutFormat: archive.OverlayWhiteoutFormat,
   762  	})
   763  }
   764  
   765  // Changes produces a list of changes between the specified layer and its
   766  // parent layer. If parent is "", then all changes will be ADD changes.
   767  func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
   768  	return d.naiveDiff.Changes(id, parent)
   769  }