
     1  /*
     2     Copyright The containerd Authors.
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    17  package mount
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"os/exec"
    23  	"path"
    24  	"strings"
    25  	"time"
    27  	""
    28  	""
    29  	""
    30  )
    32  var (
    33  	pagesize              = 4096
    34  	allowedHelperBinaries = []string{"mount.fuse", "mount.fuse3"}
    35  )
    37  func init() {
    38  	pagesize = os.Getpagesize()
    39  }
    41  // Mount to the provided target path.
    42  //
    43  // If m.Type starts with "fuse." or "fuse3.", "mount.fuse" or "mount.fuse3"
    44  // helper binary is called.
    45  func (m *Mount) Mount(target string) error {
    46  	for _, helperBinary := range allowedHelperBinaries {
    47  		// helperBinary = "mount.fuse", typePrefix = "fuse."
    48  		typePrefix := strings.TrimPrefix(helperBinary, "mount.") + "."
    49  		if strings.HasPrefix(m.Type, typePrefix) {
    50  			return m.mountWithHelper(helperBinary, typePrefix, target)
    51  		}
    52  	}
    53  	var (
    54  		chdir   string
    55  		options = m.Options
    56  	)
    58  	// avoid hitting one page limit of mount argument buffer
    59  	//
    60  	// NOTE: 512 is a buffer during pagesize check.
    61  	if m.Type == "overlay" && optionsSize(options) >= pagesize-512 {
    62  		chdir, options = compactLowerdirOption(options)
    63  	}
    65  	flags, data := parseMountOptions(options)
    66  	if len(data) > pagesize {
    67  		return errors.Errorf("mount options is too long")
    68  	}
    70  	// propagation types.
    71  	const ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
    73  	// Ensure propagation type change flags aren't included in other calls.
    74  	oflags := flags &^ ptypes
    76  	// In the case of remounting with changed data (data != ""), need to call mount (moby/moby#34077).
    77  	if flags&unix.MS_REMOUNT == 0 || data != "" {
    78  		// Initial call applying all non-propagation flags for mount
    79  		// or remount with changed data
    80  		if err := mountAt(chdir, m.Source, target, m.Type, uintptr(oflags), data); err != nil {
    81  			return err
    82  		}
    83  	}
    85  	if flags&ptypes != 0 {
    86  		// Change the propagation type.
    87  		const pflags = ptypes | unix.MS_REC | unix.MS_SILENT
    88  		if err := unix.Mount("", target, "", uintptr(flags&pflags), ""); err != nil {
    89  			return err
    90  		}
    91  	}
    93  	const broflags = unix.MS_BIND | unix.MS_RDONLY
    94  	if oflags&broflags == broflags {
    95  		// Remount the bind to apply read only.
    96  		return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "")
    97  	}
    98  	return nil
    99  }
   101  // Unmount the provided mount path with the flags
   102  func Unmount(target string, flags int) error {
   103  	if err := unmount(target, flags); err != nil && err != unix.EINVAL {
   104  		return err
   105  	}
   106  	return nil
   107  }
   109  func isFUSE(dir string) (bool, error) {
   110  	// fuseSuperMagic is defined in statfs(2)
   111  	const fuseSuperMagic = 0x65735546
   112  	var st unix.Statfs_t
   113  	if err := unix.Statfs(dir, &st); err != nil {
   114  		return false, err
   115  	}
   116  	return st.Type == fuseSuperMagic, nil
   117  }
   119  func unmount(target string, flags int) error {
   120  	// For FUSE mounts, attempting to execute fusermount helper binary is preferred
   121  	//
   122  	if ok, err := isFUSE(target); err == nil && ok {
   123  		for _, helperBinary := range []string{"fusermount3", "fusermount"} {
   124  			cmd := exec.Command(helperBinary, "-u", target)
   125  			if err := cmd.Run(); err == nil {
   126  				return nil
   127  			}
   128  			// ignore error and try unix.Unmount
   129  		}
   130  	}
   131  	for i := 0; i < 50; i++ {
   132  		if err := unix.Unmount(target, flags); err != nil {
   133  			switch err {
   134  			case unix.EBUSY:
   135  				time.Sleep(50 * time.Millisecond)
   136  				continue
   137  			default:
   138  				return err
   139  			}
   140  		}
   141  		return nil
   142  	}
   143  	return errors.Wrapf(unix.EBUSY, "failed to unmount target %s", target)
   144  }
   146  // UnmountAll repeatedly unmounts the given mount point until there
   147  // are no mounts remaining (EINVAL is returned by mount), which is
   148  // useful for undoing a stack of mounts on the same mount point.
   149  // UnmountAll all is noop when the first argument is an empty string.
   150  // This is done when the containerd client did not specify any rootfs
   151  // mounts (e.g. because the rootfs is managed outside containerd)
   152  // UnmountAll is noop when the mount path does not exist.
   153  func UnmountAll(mount string, flags int) error {
   154  	if mount == "" {
   155  		return nil
   156  	}
   157  	if _, err := os.Stat(mount); os.IsNotExist(err) {
   158  		return nil
   159  	}
   161  	for {
   162  		if err := unmount(mount, flags); err != nil {
   163  			// EINVAL is returned if the target is not a
   164  			// mount point, indicating that we are
   165  			// done. It can also indicate a few other
   166  			// things (such as invalid flags) which we
   167  			// unfortunately end up squelching here too.
   168  			if err == unix.EINVAL {
   169  				return nil
   170  			}
   171  			return err
   172  		}
   173  	}
   174  }
   176  // parseMountOptions takes fstab style mount options and parses them for
   177  // use with a standard mount() syscall
   178  func parseMountOptions(options []string) (int, string) {
   179  	var (
   180  		flag int
   181  		data []string
   182  	)
   183  	flags := map[string]struct {
   184  		clear bool
   185  		flag  int
   186  	}{
   187  		"async":         {true, unix.MS_SYNCHRONOUS},
   188  		"atime":         {true, unix.MS_NOATIME},
   189  		"bind":          {false, unix.MS_BIND},
   190  		"defaults":      {false, 0},
   191  		"dev":           {true, unix.MS_NODEV},
   192  		"diratime":      {true, unix.MS_NODIRATIME},
   193  		"dirsync":       {false, unix.MS_DIRSYNC},
   194  		"exec":          {true, unix.MS_NOEXEC},
   195  		"mand":          {false, unix.MS_MANDLOCK},
   196  		"noatime":       {false, unix.MS_NOATIME},
   197  		"nodev":         {false, unix.MS_NODEV},
   198  		"nodiratime":    {false, unix.MS_NODIRATIME},
   199  		"noexec":        {false, unix.MS_NOEXEC},
   200  		"nomand":        {true, unix.MS_MANDLOCK},
   201  		"norelatime":    {true, unix.MS_RELATIME},
   202  		"nostrictatime": {true, unix.MS_STRICTATIME},
   203  		"nosuid":        {false, unix.MS_NOSUID},
   204  		"rbind":         {false, unix.MS_BIND | unix.MS_REC},
   205  		"relatime":      {false, unix.MS_RELATIME},
   206  		"remount":       {false, unix.MS_REMOUNT},
   207  		"ro":            {false, unix.MS_RDONLY},
   208  		"rw":            {true, unix.MS_RDONLY},
   209  		"strictatime":   {false, unix.MS_STRICTATIME},
   210  		"suid":          {true, unix.MS_NOSUID},
   211  		"sync":          {false, unix.MS_SYNCHRONOUS},
   212  	}
   213  	for _, o := range options {
   214  		// If the option does not exist in the flags table or the flag
   215  		// is not supported on the platform,
   216  		// then it is a data value for a specific fs type
   217  		if f, exists := flags[o]; exists && f.flag != 0 {
   218  			if f.clear {
   219  				flag &^= f.flag
   220  			} else {
   221  				flag |= f.flag
   222  			}
   223  		} else {
   224  			data = append(data, o)
   225  		}
   226  	}
   227  	return flag, strings.Join(data, ",")
   228  }
   230  // compactLowerdirOption updates overlay lowdir option and returns the common
   231  // dir among all the lowdirs.
   232  func compactLowerdirOption(opts []string) (string, []string) {
   233  	idx, dirs := findOverlayLowerdirs(opts)
   234  	if idx == -1 || len(dirs) == 1 {
   235  		// no need to compact if there is only one lowerdir
   236  		return "", opts
   237  	}
   239  	// find out common dir
   240  	commondir := longestCommonPrefix(dirs)
   241  	if commondir == "" {
   242  		return "", opts
   243  	}
   245  	// NOTE: the snapshot id is based on digits.
   246  	// in order to avoid to get snapshots/x, should be back to parent dir.
   247  	// however, there is assumption that the common dir is ${root}/io.containerd.v1.overlayfs/snapshots.
   248  	commondir = path.Dir(commondir)
   249  	if commondir == "/" {
   250  		return "", opts
   251  	}
   252  	commondir = commondir + "/"
   254  	newdirs := make([]string, 0, len(dirs))
   255  	for _, dir := range dirs {
   256  		newdirs = append(newdirs, dir[len(commondir):])
   257  	}
   259  	newopts := copyOptions(opts)
   260  	newopts = append(newopts[:idx], newopts[idx+1:]...)
   261  	newopts = append(newopts, fmt.Sprintf("lowerdir=%s", strings.Join(newdirs, ":")))
   262  	return commondir, newopts
   263  }
   265  // findOverlayLowerdirs returns the index of lowerdir in mount's options and
   266  // all the lowerdir target.
   267  func findOverlayLowerdirs(opts []string) (int, []string) {
   268  	var (
   269  		idx    = -1
   270  		prefix = "lowerdir="
   271  	)
   273  	for i, opt := range opts {
   274  		if strings.HasPrefix(opt, prefix) {
   275  			idx = i
   276  			break
   277  		}
   278  	}
   280  	if idx == -1 {
   281  		return -1, nil
   282  	}
   283  	return idx, strings.Split(opts[idx][len(prefix):], ":")
   284  }
   286  // longestCommonPrefix finds the longest common prefix in the string slice.
   287  func longestCommonPrefix(strs []string) string {
   288  	if len(strs) == 0 {
   289  		return ""
   290  	} else if len(strs) == 1 {
   291  		return strs[0]
   292  	}
   294  	// find out the min/max value by alphabetical order
   295  	min, max := strs[0], strs[0]
   296  	for _, str := range strs[1:] {
   297  		if min > str {
   298  			min = str
   299  		}
   300  		if max < str {
   301  			max = str
   302  		}
   303  	}
   305  	// find out the common part between min and max
   306  	for i := 0; i < len(min) && i < len(max); i++ {
   307  		if min[i] != max[i] {
   308  			return min[:i]
   309  		}
   310  	}
   311  	return min
   312  }
   314  // copyOptions copies the options.
   315  func copyOptions(opts []string) []string {
   316  	if len(opts) == 0 {
   317  		return nil
   318  	}
   320  	acopy := make([]string, len(opts))
   321  	copy(acopy, opts)
   322  	return acopy
   323  }
   325  // optionsSize returns the byte size of options of mount.
   326  func optionsSize(opts []string) int {
   327  	size := 0
   328  	for _, opt := range opts {
   329  		size += len(opt)
   330  	}
   331  	return size
   332  }
   334  func mountAt(chdir string, source, target, fstype string, flags uintptr, data string) error {
   335  	if chdir == "" {
   336  		return unix.Mount(source, target, fstype, flags, data)
   337  	}
   339  	f, err := os.Open(chdir)
   340  	if err != nil {
   341  		return errors.Wrap(err, "failed to mountat")
   342  	}
   343  	defer f.Close()
   345  	fs, err := f.Stat()
   346  	if err != nil {
   347  		return errors.Wrap(err, "failed to mountat")
   348  	}
   350  	if !fs.IsDir() {
   351  		return errors.Wrap(errors.Errorf("%s is not dir", chdir), "failed to mountat")
   352  	}
   353  	return errors.Wrap(sys.FMountat(f.Fd(), source, target, fstype, flags, data), "failed to mountat")
   354  }
   356  func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
   357  	// helperBinary: "mount.fuse3"
   358  	// target: "/foo/merged"
   359  	// m.Type: "fuse3.fuse-overlayfs"
   360  	// command: "mount.fuse3 overlay /foo/merged -o lowerdir=/foo/lower2:/foo/lower1,upperdir=/foo/upper,workdir=/foo/work -t fuse-overlayfs"
   361  	args := []string{m.Source, target}
   362  	for _, o := range m.Options {
   363  		args = append(args, "-o", o)
   364  	}
   365  	args = append(args, "-t", strings.TrimPrefix(m.Type, typePrefix))
   367  	infoBeforeMount, err := Lookup(target)
   368  	if err != nil {
   369  		return err
   370  	}
   372  	// cmd.CombinedOutput() may intermittently return ECHILD because of our signal handling in shim.
   373  	// See #4387 and wait(2).
   374  	const retriesOnECHILD = 10
   375  	for i := 0; i < retriesOnECHILD; i++ {
   376  		cmd := exec.Command(helperBinary, args...)
   377  		out, err := cmd.CombinedOutput()
   378  		if err == nil {
   379  			return nil
   380  		}
   381  		if !errors.Is(err, unix.ECHILD) {
   382  			return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out))
   383  		}
   384  		// We got ECHILD, we are not sure whether the mount was successful.
   385  		// If the mount ID has changed, we are sure we got some new mount, but still not sure it is fully completed.
   386  		// So we attempt to unmount the new mount before retrying.
   387  		infoAfterMount, err := Lookup(target)
   388  		if err != nil {
   389  			return err
   390  		}
   391  		if infoAfterMount.ID != infoBeforeMount.ID {
   392  			_ = unmount(target, 0)
   393  		}
   394  	}
   395  	return errors.Errorf("mount helper [%s %v] failed with ECHILD (retired %d times)", helperBinary, args, retriesOnECHILD)
   396  }