github.com/endophage/docker@v1.4.2-0.20161027011718-242853499895/daemon/graphdriver/overlay2/overlay.go (about)

     1  // +build linux
     2  
     3  package overlay2
     4  
     5  import (
     6  	"bufio"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"os/exec"
    13  	"path"
    14  	"strconv"
    15  	"strings"
    16  	"syscall"
    17  
    18  	"github.com/Sirupsen/logrus"
    19  
    20  	"github.com/docker/docker/daemon/graphdriver"
    21  	"github.com/docker/docker/daemon/graphdriver/quota"
    22  	"github.com/docker/docker/pkg/archive"
    23  	"github.com/docker/docker/pkg/chrootarchive"
    24  	"github.com/docker/docker/pkg/directory"
    25  	"github.com/docker/docker/pkg/idtools"
    26  	"github.com/docker/docker/pkg/mount"
    27  	"github.com/docker/docker/pkg/parsers"
    28  	"github.com/docker/docker/pkg/parsers/kernel"
    29  	"github.com/docker/go-units"
    30  
    31  	"github.com/opencontainers/runc/libcontainer/label"
    32  )
    33  
    34  var (
    35  	// untar defines the untar method
    36  	untar = chrootarchive.UntarUncompressed
    37  )
    38  
    39  // This backend uses the overlay union filesystem for containers
    40  // with diff directories for each layer.
    41  
    42  // This version of the overlay driver requires at least kernel
    43  // 4.0.0 in order to support mounting multiple diff directories.
    44  
    45  // Each container/image has at least a "diff" directory and "link" file.
    46  // If there is also a "lower" file when there are diff layers
    47  // below  as well as "merged" and "work" directories. The "diff" directory
    48  // has the upper layer of the overlay and is used to capture any
    49  // changes to the layer. The "lower" file contains all the lower layer
    50  // mounts separated by ":" and ordered from uppermost to lowermost
    51  // layers. The overlay itself is mounted in the "merged" directory,
    52  // and the "work" dir is needed for overlay to work.
    53  
    54  // The "link" file for each layer contains a unique string for the layer.
    55  // Under the "l" directory at the root there will be a symbolic link
    56  // with that unique string pointing the "diff" directory for the layer.
    57  // The symbolic links are used to reference lower layers in the "lower"
    58  // file and on mount. The links are used to shorten the total length
    59  // of a layer reference without requiring changes to the layer identifier
    60  // or root directory. Mounts are always done relative to root and
    61  // referencing the symbolic links in order to ensure the number of
    62  // lower directories can fit in a single page for making the mount
    63  // syscall. A hard upper limit of 128 lower layers is enforced to ensure
    64  // that mounts do not fail due to length.
    65  
    66  const (
    67  	driverName = "overlay2"
    68  	linkDir    = "l"
    69  	lowerFile  = "lower"
    70  	maxDepth   = 128
    71  
    72  	// idLength represents the number of random characters
    73  	// which can be used to create the unique link identifer
    74  	// for every layer. If this value is too long then the
    75  	// page size limit for the mount command may be exceeded.
    76  	// The idLength should be selected such that following equation
    77  	// is true (512 is a buffer for label metadata).
    78  	// ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512)
    79  	idLength = 26
    80  )
    81  
    82  type overlayOptions struct {
    83  	overrideKernelCheck bool
    84  	quota               quota.Quota
    85  }
    86  
    87  // Driver contains information about the home directory and the list of active mounts that are created using this driver.
    88  type Driver struct {
    89  	home     string
    90  	uidMaps  []idtools.IDMap
    91  	gidMaps  []idtools.IDMap
    92  	ctr      *graphdriver.RefCounter
    93  	quotaCtl *quota.Control
    94  	options  overlayOptions
    95  }
    96  
    97  var (
    98  	backingFs             = "<unknown>"
    99  	projectQuotaSupported = false
   100  )
   101  
   102  func init() {
   103  	graphdriver.Register(driverName, Init)
   104  }
   105  
   106  // Init returns the a native diff driver for overlay filesystem.
   107  // If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
   108  // If an overlay filesystem is not supported over an existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
   109  func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
   110  	opts, err := parseOptions(options)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	if err := supportsOverlay(); err != nil {
   116  		return nil, graphdriver.ErrNotSupported
   117  	}
   118  
   119  	// require kernel 4.0.0 to ensure multiple lower dirs are supported
   120  	v, err := kernel.GetKernelVersion()
   121  	if err != nil {
   122  		return nil, err
   123  	}
   124  	if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 0, Minor: 0}) < 0 {
   125  		if !opts.overrideKernelCheck {
   126  			return nil, graphdriver.ErrNotSupported
   127  		}
   128  		logrus.Warnf("Using pre-4.0.0 kernel for overlay2, mount failures may require kernel update")
   129  	}
   130  
   131  	fsMagic, err := graphdriver.GetFSMagic(home)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	if fsName, ok := graphdriver.FsNames[fsMagic]; ok {
   136  		backingFs = fsName
   137  	}
   138  
   139  	// check if they are running over btrfs, aufs, zfs, overlay, or ecryptfs
   140  	switch fsMagic {
   141  	case graphdriver.FsMagicBtrfs, graphdriver.FsMagicAufs, graphdriver.FsMagicZfs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
   142  		logrus.Errorf("'overlay2' is not supported over %s", backingFs)
   143  		return nil, graphdriver.ErrIncompatibleFS
   144  	}
   145  
   146  	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  	// Create the driver home dir
   151  	if err := idtools.MkdirAllAs(path.Join(home, linkDir), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
   152  		return nil, err
   153  	}
   154  
   155  	if err := mount.MakePrivate(home); err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	d := &Driver{
   160  		home:    home,
   161  		uidMaps: uidMaps,
   162  		gidMaps: gidMaps,
   163  		ctr:     graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
   164  	}
   165  
   166  	if backingFs == "xfs" {
   167  		// Try to enable project quota support over xfs.
   168  		if d.quotaCtl, err = quota.NewControl(home); err == nil {
   169  			projectQuotaSupported = true
   170  		}
   171  	}
   172  
   173  	logrus.Debugf("backingFs=%s,  projectQuotaSupported=%v", backingFs, projectQuotaSupported)
   174  
   175  	return d, nil
   176  }
   177  
   178  func parseOptions(options []string) (*overlayOptions, error) {
   179  	o := &overlayOptions{}
   180  	for _, option := range options {
   181  		key, val, err := parsers.ParseKeyValueOpt(option)
   182  		if err != nil {
   183  			return nil, err
   184  		}
   185  		key = strings.ToLower(key)
   186  		switch key {
   187  		case "overlay2.override_kernel_check":
   188  			o.overrideKernelCheck, err = strconv.ParseBool(val)
   189  			if err != nil {
   190  				return nil, err
   191  			}
   192  
   193  		default:
   194  			return nil, fmt.Errorf("overlay2: Unknown option %s\n", key)
   195  		}
   196  	}
   197  	return o, nil
   198  }
   199  
   200  func supportsOverlay() error {
   201  	// We can try to modprobe overlay first before looking at
   202  	// proc/filesystems for when overlay is supported
   203  	exec.Command("modprobe", "overlay").Run()
   204  
   205  	f, err := os.Open("/proc/filesystems")
   206  	if err != nil {
   207  		return err
   208  	}
   209  	defer f.Close()
   210  
   211  	s := bufio.NewScanner(f)
   212  	for s.Scan() {
   213  		if s.Text() == "nodev\toverlay" {
   214  			return nil
   215  		}
   216  	}
   217  	logrus.Error("'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.")
   218  	return graphdriver.ErrNotSupported
   219  }
   220  
   221  func (d *Driver) String() string {
   222  	return driverName
   223  }
   224  
   225  // Status returns current driver information in a two dimensional string array.
   226  // Output contains "Backing Filesystem" used in this implementation.
   227  func (d *Driver) Status() [][2]string {
   228  	return [][2]string{
   229  		{"Backing Filesystem", backingFs},
   230  	}
   231  }
   232  
   233  // GetMetadata returns meta data about the overlay driver such as
   234  // LowerDir, UpperDir, WorkDir and MergeDir used to store data.
   235  func (d *Driver) GetMetadata(id string) (map[string]string, error) {
   236  	dir := d.dir(id)
   237  	if _, err := os.Stat(dir); err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	metadata := map[string]string{
   242  		"WorkDir":   path.Join(dir, "work"),
   243  		"MergedDir": path.Join(dir, "merged"),
   244  		"UpperDir":  path.Join(dir, "diff"),
   245  	}
   246  
   247  	lowerDirs, err := d.getLowerDirs(id)
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  	if len(lowerDirs) > 0 {
   252  		metadata["LowerDir"] = strings.Join(lowerDirs, ":")
   253  	}
   254  
   255  	return metadata, nil
   256  }
   257  
   258  // Cleanup any state created by overlay which should be cleaned when daemon
   259  // is being shutdown. For now, we just have to unmount the bind mounted
   260  // we had created.
   261  func (d *Driver) Cleanup() error {
   262  	return mount.Unmount(d.home)
   263  }
   264  
   265  // CreateReadWrite creates a layer that is writable for use as a container
   266  // file system.
   267  func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
   268  	return d.Create(id, parent, mountLabel, storageOpt)
   269  }
   270  
   271  // Create is used to create the upper, lower, and merge directories required for overlay fs for a given id.
   272  // The parent filesystem is used to configure these directories for the overlay.
   273  func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
   274  
   275  	if len(storageOpt) != 0 && !projectQuotaSupported {
   276  		return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
   277  	}
   278  
   279  	dir := d.dir(id)
   280  
   281  	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
   282  	if err != nil {
   283  		return err
   284  	}
   285  	if err := idtools.MkdirAllAs(path.Dir(dir), 0700, rootUID, rootGID); err != nil {
   286  		return err
   287  	}
   288  	if err := idtools.MkdirAs(dir, 0700, rootUID, rootGID); err != nil {
   289  		return err
   290  	}
   291  
   292  	defer func() {
   293  		// Clean up on failure
   294  		if retErr != nil {
   295  			os.RemoveAll(dir)
   296  		}
   297  	}()
   298  
   299  	if len(storageOpt) > 0 {
   300  		driver := &Driver{}
   301  		if err := d.parseStorageOpt(storageOpt, driver); err != nil {
   302  			return err
   303  		}
   304  
   305  		if driver.options.quota.Size > 0 {
   306  			// Set container disk quota limit
   307  			if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
   308  				return err
   309  			}
   310  		}
   311  	}
   312  
   313  	if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
   314  		return err
   315  	}
   316  
   317  	lid := generateID(idLength)
   318  	if err := os.Symlink(path.Join("..", id, "diff"), path.Join(d.home, linkDir, lid)); err != nil {
   319  		return err
   320  	}
   321  
   322  	// Write link id to link file
   323  	if err := ioutil.WriteFile(path.Join(dir, "link"), []byte(lid), 0644); err != nil {
   324  		return err
   325  	}
   326  
   327  	// if no parent directory, done
   328  	if parent == "" {
   329  		return nil
   330  	}
   331  
   332  	if err := idtools.MkdirAs(path.Join(dir, "work"), 0700, rootUID, rootGID); err != nil {
   333  		return err
   334  	}
   335  	if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
   336  		return err
   337  	}
   338  
   339  	lower, err := d.getLower(parent)
   340  	if err != nil {
   341  		return err
   342  	}
   343  	if lower != "" {
   344  		if err := ioutil.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0666); err != nil {
   345  			return err
   346  		}
   347  	}
   348  
   349  	return nil
   350  }
   351  
   352  // Parse overlay storage options
   353  func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
   354  	// Read size to set the disk project quota per container
   355  	for key, val := range storageOpt {
   356  		key := strings.ToLower(key)
   357  		switch key {
   358  		case "size":
   359  			size, err := units.RAMInBytes(val)
   360  			if err != nil {
   361  				return err
   362  			}
   363  			driver.options.quota.Size = uint64(size)
   364  		default:
   365  			return fmt.Errorf("Unknown option %s", key)
   366  		}
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  func (d *Driver) getLower(parent string) (string, error) {
   373  	parentDir := d.dir(parent)
   374  
   375  	// Ensure parent exists
   376  	if _, err := os.Lstat(parentDir); err != nil {
   377  		return "", err
   378  	}
   379  
   380  	// Read Parent link fileA
   381  	parentLink, err := ioutil.ReadFile(path.Join(parentDir, "link"))
   382  	if err != nil {
   383  		return "", err
   384  	}
   385  	lowers := []string{path.Join(linkDir, string(parentLink))}
   386  
   387  	parentLower, err := ioutil.ReadFile(path.Join(parentDir, lowerFile))
   388  	if err == nil {
   389  		parentLowers := strings.Split(string(parentLower), ":")
   390  		lowers = append(lowers, parentLowers...)
   391  	}
   392  	if len(lowers) > maxDepth {
   393  		return "", errors.New("max depth exceeded")
   394  	}
   395  	return strings.Join(lowers, ":"), nil
   396  }
   397  
   398  func (d *Driver) dir(id string) string {
   399  	return path.Join(d.home, id)
   400  }
   401  
   402  func (d *Driver) getLowerDirs(id string) ([]string, error) {
   403  	var lowersArray []string
   404  	lowers, err := ioutil.ReadFile(path.Join(d.dir(id), lowerFile))
   405  	if err == nil {
   406  		for _, s := range strings.Split(string(lowers), ":") {
   407  			lp, err := os.Readlink(path.Join(d.home, s))
   408  			if err != nil {
   409  				return nil, err
   410  			}
   411  			lowersArray = append(lowersArray, path.Clean(path.Join(d.home, "link", lp)))
   412  		}
   413  	} else if !os.IsNotExist(err) {
   414  		return nil, err
   415  	}
   416  	return lowersArray, nil
   417  }
   418  
   419  // Remove cleans the directories that are created for this id.
   420  func (d *Driver) Remove(id string) error {
   421  	dir := d.dir(id)
   422  	lid, err := ioutil.ReadFile(path.Join(dir, "link"))
   423  	if err == nil {
   424  		if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil {
   425  			logrus.Debugf("Failed to remove link: %v", err)
   426  		}
   427  	}
   428  
   429  	if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
   430  		return err
   431  	}
   432  	return nil
   433  }
   434  
   435  // Get creates and mounts the required file system for the given id and returns the mount path.
   436  func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
   437  	dir := d.dir(id)
   438  	if _, err := os.Stat(dir); err != nil {
   439  		return "", err
   440  	}
   441  
   442  	diffDir := path.Join(dir, "diff")
   443  	lowers, err := ioutil.ReadFile(path.Join(dir, lowerFile))
   444  	if err != nil {
   445  		// If no lower, just return diff directory
   446  		if os.IsNotExist(err) {
   447  			return diffDir, nil
   448  		}
   449  		return "", err
   450  	}
   451  
   452  	mergedDir := path.Join(dir, "merged")
   453  	if count := d.ctr.Increment(mergedDir); count > 1 {
   454  		return mergedDir, nil
   455  	}
   456  	defer func() {
   457  		if err != nil {
   458  			if c := d.ctr.Decrement(mergedDir); c <= 0 {
   459  				syscall.Unmount(mergedDir, 0)
   460  			}
   461  		}
   462  	}()
   463  
   464  	workDir := path.Join(dir, "work")
   465  	splitLowers := strings.Split(string(lowers), ":")
   466  	absLowers := make([]string, len(splitLowers))
   467  	for i, s := range splitLowers {
   468  		absLowers[i] = path.Join(d.home, s)
   469  	}
   470  	opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), path.Join(dir, "diff"), path.Join(dir, "work"))
   471  	mountData := label.FormatMountLabel(opts, mountLabel)
   472  	mount := syscall.Mount
   473  	mountTarget := mergedDir
   474  
   475  	pageSize := syscall.Getpagesize()
   476  
   477  	// Go can return a larger page size than supported by the system
   478  	// as of go 1.7. This will be fixed in 1.8 and this block can be
   479  	// removed when building with 1.8.
   480  	// See https://github.com/golang/go/commit/1b9499b06989d2831e5b156161d6c07642926ee1
   481  	// See https://github.com/docker/docker/issues/27384
   482  	if pageSize > 4096 {
   483  		pageSize = 4096
   484  	}
   485  
   486  	// Use relative paths and mountFrom when the mount data has exceeded
   487  	// the page size. The mount syscall fails if the mount data cannot
   488  	// fit within a page and relative links make the mount data much
   489  	// smaller at the expense of requiring a fork exec to chroot.
   490  	if len(mountData) > pageSize {
   491  		opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", string(lowers), path.Join(id, "diff"), path.Join(id, "work"))
   492  		mountData = label.FormatMountLabel(opts, mountLabel)
   493  		if len(mountData) > pageSize {
   494  			return "", fmt.Errorf("cannot mount layer, mount label too large %d", len(mountData))
   495  		}
   496  
   497  		mount = func(source string, target string, mType string, flags uintptr, label string) error {
   498  			return mountFrom(d.home, source, target, mType, flags, label)
   499  		}
   500  		mountTarget = path.Join(id, "merged")
   501  	}
   502  
   503  	if err := mount("overlay", mountTarget, "overlay", 0, mountData); err != nil {
   504  		return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
   505  	}
   506  
   507  	// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
   508  	// user namespace requires this to move a directory from lower to upper.
   509  	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
   510  	if err != nil {
   511  		return "", err
   512  	}
   513  
   514  	if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
   515  		return "", err
   516  	}
   517  
   518  	return mergedDir, nil
   519  }
   520  
   521  // Put unmounts the mount path created for the give id.
   522  func (d *Driver) Put(id string) error {
   523  	mountpoint := path.Join(d.dir(id), "merged")
   524  	if count := d.ctr.Decrement(mountpoint); count > 0 {
   525  		return nil
   526  	}
   527  	if err := syscall.Unmount(mountpoint, 0); err != nil {
   528  		logrus.Debugf("Failed to unmount %s overlay: %v", id, err)
   529  	}
   530  	return nil
   531  }
   532  
   533  // Exists checks to see if the id is already mounted.
   534  func (d *Driver) Exists(id string) bool {
   535  	_, err := os.Stat(d.dir(id))
   536  	return err == nil
   537  }
   538  
   539  // ApplyDiff applies the new layer into a root
   540  func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64, err error) {
   541  	applyDir := d.getDiffPath(id)
   542  
   543  	logrus.Debugf("Applying tar in %s", applyDir)
   544  	// Overlay doesn't need the parent id to apply the diff
   545  	if err := untar(diff, applyDir, &archive.TarOptions{
   546  		UIDMaps:        d.uidMaps,
   547  		GIDMaps:        d.gidMaps,
   548  		WhiteoutFormat: archive.OverlayWhiteoutFormat,
   549  	}); err != nil {
   550  		return 0, err
   551  	}
   552  
   553  	return d.DiffSize(id, parent)
   554  }
   555  
   556  func (d *Driver) getDiffPath(id string) string {
   557  	dir := d.dir(id)
   558  
   559  	return path.Join(dir, "diff")
   560  }
   561  
   562  // DiffSize calculates the changes between the specified id
   563  // and its parent and returns the size in bytes of the changes
   564  // relative to its base filesystem directory.
   565  func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
   566  	return directory.Size(d.getDiffPath(id))
   567  }
   568  
   569  // Diff produces an archive of the changes between the specified
   570  // layer and its parent layer which may be "".
   571  func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
   572  	diffPath := d.getDiffPath(id)
   573  	logrus.Debugf("Tar with options on %s", diffPath)
   574  	return archive.TarWithOptions(diffPath, &archive.TarOptions{
   575  		Compression:    archive.Uncompressed,
   576  		UIDMaps:        d.uidMaps,
   577  		GIDMaps:        d.gidMaps,
   578  		WhiteoutFormat: archive.OverlayWhiteoutFormat,
   579  	})
   580  }
   581  
   582  // Changes produces a list of changes between the specified layer
   583  // and its parent layer. If parent is "", then all changes will be ADD changes.
   584  func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
   585  	// Overlay doesn't have snapshots, so we need to get changes from all parent
   586  	// layers.
   587  	diffPath := d.getDiffPath(id)
   588  	layers, err := d.getLowerDirs(id)
   589  	if err != nil {
   590  		return nil, err
   591  	}
   592  
   593  	return archive.OverlayChanges(layers, diffPath)
   594  }