github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/syscalls/linux/sys_mount.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/metacubex/gvisor/pkg/abi/linux"
    19  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    20  	"github.com/metacubex/gvisor/pkg/hostarch"
    21  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    22  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    23  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    24  )
    25  
    26  // Mount implements Linux syscall mount(2).
    27  func Mount(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    28  	sourceAddr := args[0].Pointer()
    29  	targetAddr := args[1].Pointer()
    30  	typeAddr := args[2].Pointer()
    31  	flags := args[3].Uint64()
    32  	dataAddr := args[4].Pointer()
    33  
    34  	// Must have CAP_SYS_ADMIN in the current mount namespace's associated user
    35  	// namespace.
    36  	creds := t.Credentials()
    37  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
    38  		return 0, nil, linuxerr.EPERM
    39  	}
    40  
    41  	// Ignore magic value that was required before Linux 2.4.
    42  	if flags&linux.MS_MGC_MSK == linux.MS_MGC_VAL {
    43  		flags = flags &^ linux.MS_MGC_MSK
    44  	}
    45  
    46  	// Silently allow MS_NOSUID, since we don't implement set-id bits anyway.
    47  	const unsupported = linux.MS_UNBINDABLE | linux.MS_MOVE | linux.MS_NODIRATIME
    48  
    49  	// Linux just allows passing any flags to mount(2) - it won't fail when
    50  	// unknown or unsupported flags are passed. Since we don't implement
    51  	// everything, we fail explicitly on flags that are unimplemented.
    52  	if flags&(unsupported) != 0 {
    53  		return 0, nil, linuxerr.EINVAL
    54  	}
    55  
    56  	// For null-terminated strings related to mount(2), Linux copies in at most
    57  	// a page worth of data. See fs/namespace.c:copy_mount_string().
    58  	targetPath, err := copyInPath(t, targetAddr)
    59  	if err != nil {
    60  		return 0, nil, err
    61  	}
    62  	target, err := getTaskPathOperation(t, linux.AT_FDCWD, targetPath, disallowEmptyPath, followFinalSymlink)
    63  	if err != nil {
    64  		return 0, nil, err
    65  	}
    66  	defer target.Release(t)
    67  	var opts vfs.MountOptions
    68  	if flags&(linux.MS_NOATIME|linux.MS_STRICTATIME) == linux.MS_NOATIME {
    69  		opts.Flags.NoATime = true
    70  	}
    71  	if flags&linux.MS_NOEXEC == linux.MS_NOEXEC {
    72  		opts.Flags.NoExec = true
    73  	}
    74  	if flags&linux.MS_NODEV == linux.MS_NODEV {
    75  		opts.Flags.NoDev = true
    76  	}
    77  	if flags&linux.MS_NOSUID == linux.MS_NOSUID {
    78  		opts.Flags.NoSUID = true
    79  	}
    80  	if flags&linux.MS_RDONLY == linux.MS_RDONLY {
    81  		opts.ReadOnly = true
    82  	}
    83  	data := ""
    84  	if dataAddr != 0 {
    85  		// In Linux, a full page is always copied in regardless of null
    86  		// character placement, and the address is passed to each file system.
    87  		// Most file systems always treat this data as a string, though, and so
    88  		// do all of the ones we implement.
    89  		data, err = t.CopyInString(dataAddr, hostarch.PageSize)
    90  		if err != nil {
    91  			return 0, nil, err
    92  		}
    93  	}
    94  	opts.GetFilesystemOptions.Data = data
    95  	switch {
    96  	case flags&linux.MS_REMOUNT != 0:
    97  		// When MS_REMOUNT is specified, the flags and data should match the values used in the original mount() call,
    98  		// except for those parameters that are being changed.
    99  		//
   100  		// The src and filesystem type are ignored for MS_REMOUNT.
   101  		return 0, nil, t.Kernel().VFS().RemountAt(t, creds, &target.pop, &opts)
   102  	case flags&linux.MS_BIND != 0:
   103  		sourcePath, err := copyInPath(t, sourceAddr)
   104  		if err != nil {
   105  			return 0, nil, err
   106  		}
   107  		var sourceTpop taskPathOperation
   108  		sourceTpop, err = getTaskPathOperation(t, linux.AT_FDCWD, sourcePath, disallowEmptyPath, followFinalSymlink)
   109  		if err != nil {
   110  			return 0, nil, err
   111  		}
   112  		defer sourceTpop.Release(t)
   113  		return 0, nil, t.Kernel().VFS().BindAt(t, creds, &sourceTpop.pop, &target.pop, flags&linux.MS_REC != 0)
   114  	case flags&(linux.MS_SHARED|linux.MS_PRIVATE|linux.MS_SLAVE|linux.MS_UNBINDABLE) != 0:
   115  		return 0, nil, t.Kernel().VFS().SetMountPropagationAt(t, creds, &target.pop, uint32(flags))
   116  	}
   117  
   118  	// Only copy in source, fstype, and data if we are doing a normal mount.
   119  	source, err := t.CopyInString(sourceAddr, hostarch.PageSize)
   120  	if err != nil {
   121  		return 0, nil, err
   122  	}
   123  	fsType, err := t.CopyInString(typeAddr, hostarch.PageSize)
   124  	if err != nil {
   125  		return 0, nil, err
   126  	}
   127  	_, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts)
   128  	return 0, nil, err
   129  }
   130  
   131  // Umount2 implements Linux syscall umount2(2).
   132  func Umount2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   133  	addr := args[0].Pointer()
   134  	flags := args[1].Int()
   135  
   136  	// Must have CAP_SYS_ADMIN in the mount namespace's associated user
   137  	// namespace.
   138  	//
   139  	// Currently, this is always the init task's user namespace.
   140  	creds := t.Credentials()
   141  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
   142  		return 0, nil, linuxerr.EPERM
   143  	}
   144  
   145  	const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE
   146  	if flags&unsupported != 0 {
   147  		return 0, nil, linuxerr.EINVAL
   148  	}
   149  
   150  	path, err := copyInPath(t, addr)
   151  	if err != nil {
   152  		return 0, nil, err
   153  	}
   154  	tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, shouldFollowFinalSymlink(flags&linux.UMOUNT_NOFOLLOW == 0))
   155  	if err != nil {
   156  		return 0, nil, err
   157  	}
   158  	defer tpop.Release(t)
   159  
   160  	opts := vfs.UmountOptions{
   161  		Flags: uint32(flags &^ linux.UMOUNT_NOFOLLOW),
   162  	}
   163  
   164  	return 0, nil, t.Kernel().VFS().UmountAt(t, creds, &tpop.pop, &opts)
   165  }