github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/syscalls/linux/sys_mount.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    19  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    20  	"github.com/MerlinKodo/gvisor/pkg/fspath"
    21  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    22  	"github.com/MerlinKodo/gvisor/pkg/sentry/arch"
    23  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel"
    24  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    25  )
    26  
    27  // Mount implements Linux syscall mount(2).
    28  func Mount(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    29  	sourceAddr := args[0].Pointer()
    30  	targetAddr := args[1].Pointer()
    31  	typeAddr := args[2].Pointer()
    32  	flags := args[3].Uint64()
    33  	dataAddr := args[4].Pointer()
    34  
    35  	// Must have CAP_SYS_ADMIN in the current mount namespace's associated user
    36  	// namespace.
    37  	creds := t.Credentials()
    38  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
    39  		return 0, nil, linuxerr.EPERM
    40  	}
    41  
    42  	// Ignore magic value that was required before Linux 2.4.
    43  	if flags&linux.MS_MGC_MSK == linux.MS_MGC_VAL {
    44  		flags = flags &^ linux.MS_MGC_MSK
    45  	}
    46  
    47  	// Silently allow MS_NOSUID, since we don't implement set-id bits anyway.
    48  	const unsupported = linux.MS_REMOUNT | linux.MS_SLAVE |
    49  		linux.MS_UNBINDABLE | linux.MS_MOVE | linux.MS_REC | linux.MS_NODIRATIME
    50  
    51  	// Linux just allows passing any flags to mount(2) - it won't fail when
    52  	// unknown or unsupported flags are passed. Since we don't implement
    53  	// everything, we fail explicitly on flags that are unimplemented.
    54  	if flags&(unsupported) != 0 {
    55  		return 0, nil, linuxerr.EINVAL
    56  	}
    57  
    58  	// For null-terminated strings related to mount(2), Linux copies in at most
    59  	// a page worth of data. See fs/namespace.c:copy_mount_string().
    60  	targetPath, err := copyInPath(t, targetAddr)
    61  	if err != nil {
    62  		return 0, nil, err
    63  	}
    64  	target, err := getTaskPathOperation(t, linux.AT_FDCWD, targetPath, disallowEmptyPath, followFinalSymlink)
    65  	if err != nil {
    66  		return 0, nil, err
    67  	}
    68  	defer target.Release(t)
    69  
    70  	if flags&linux.MS_BIND == linux.MS_BIND {
    71  		var sourcePath fspath.Path
    72  		sourcePath, err = copyInPath(t, sourceAddr)
    73  		if err != nil {
    74  			return 0, nil, err
    75  		}
    76  		var sourceTpop taskPathOperation
    77  		sourceTpop, err = getTaskPathOperation(t, linux.AT_FDCWD, sourcePath, disallowEmptyPath, followFinalSymlink)
    78  		if err != nil {
    79  			return 0, nil, err
    80  		}
    81  		defer sourceTpop.Release(t)
    82  		_, err = t.Kernel().VFS().BindAt(t, creds, &sourceTpop.pop, &target.pop)
    83  		return 0, nil, err
    84  	}
    85  	const propagationFlags = linux.MS_SHARED | linux.MS_PRIVATE | linux.MS_SLAVE | linux.MS_UNBINDABLE
    86  	if propFlag := flags & propagationFlags; propFlag != 0 {
    87  		return 0, nil, t.Kernel().VFS().SetMountPropagationAt(t, creds, &target.pop, uint32(propFlag))
    88  	}
    89  
    90  	// Only copy in source, fstype, and data if we are doing a normal mount.
    91  	source, err := t.CopyInString(sourceAddr, hostarch.PageSize)
    92  	if err != nil {
    93  		return 0, nil, err
    94  	}
    95  	fsType, err := t.CopyInString(typeAddr, hostarch.PageSize)
    96  	if err != nil {
    97  		return 0, nil, err
    98  	}
    99  	data := ""
   100  	if dataAddr != 0 {
   101  		// In Linux, a full page is always copied in regardless of null
   102  		// character placement, and the address is passed to each file system.
   103  		// Most file systems always treat this data as a string, though, and so
   104  		// do all of the ones we implement.
   105  		data, err = t.CopyInString(dataAddr, hostarch.PageSize)
   106  		if err != nil {
   107  			return 0, nil, err
   108  		}
   109  	}
   110  	var opts vfs.MountOptions
   111  	if flags&(linux.MS_NOATIME|linux.MS_STRICTATIME) == linux.MS_NOATIME {
   112  		opts.Flags.NoATime = true
   113  	}
   114  	if flags&linux.MS_NOEXEC == linux.MS_NOEXEC {
   115  		opts.Flags.NoExec = true
   116  	}
   117  	if flags&linux.MS_NODEV == linux.MS_NODEV {
   118  		opts.Flags.NoDev = true
   119  	}
   120  	if flags&linux.MS_NOSUID == linux.MS_NOSUID {
   121  		opts.Flags.NoSUID = true
   122  	}
   123  	if flags&linux.MS_RDONLY == linux.MS_RDONLY {
   124  		opts.ReadOnly = true
   125  	}
   126  	opts.GetFilesystemOptions.Data = data
   127  	_, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts)
   128  	return 0, nil, err
   129  }
   130  
   131  // Umount2 implements Linux syscall umount2(2).
   132  func Umount2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   133  	addr := args[0].Pointer()
   134  	flags := args[1].Int()
   135  
   136  	// Must have CAP_SYS_ADMIN in the mount namespace's associated user
   137  	// namespace.
   138  	//
   139  	// Currently, this is always the init task's user namespace.
   140  	creds := t.Credentials()
   141  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
   142  		return 0, nil, linuxerr.EPERM
   143  	}
   144  
   145  	const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE
   146  	if flags&unsupported != 0 {
   147  		return 0, nil, linuxerr.EINVAL
   148  	}
   149  
   150  	path, err := copyInPath(t, addr)
   151  	if err != nil {
   152  		return 0, nil, err
   153  	}
   154  	tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, shouldFollowFinalSymlink(flags&linux.UMOUNT_NOFOLLOW == 0))
   155  	if err != nil {
   156  		return 0, nil, err
   157  	}
   158  	defer tpop.Release(t)
   159  
   160  	opts := vfs.UmountOptions{
   161  		Flags: uint32(flags &^ linux.UMOUNT_NOFOLLOW),
   162  	}
   163  
   164  	return 0, nil, t.Kernel().VFS().UmountAt(t, creds, &tpop.pop, &opts)
   165  }