github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/syscalls/linux/sys_mount.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    19  	"github.com/nicocha30/gvisor-ligolo/pkg/bits"
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/fspath"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    26  )
    27  
    28  // Mount implements Linux syscall mount(2).
    29  func Mount(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    30  	sourceAddr := args[0].Pointer()
    31  	targetAddr := args[1].Pointer()
    32  	typeAddr := args[2].Pointer()
    33  	flags := args[3].Uint64()
    34  	dataAddr := args[4].Pointer()
    35  
    36  	// Must have CAP_SYS_ADMIN in the current mount namespace's associated user
    37  	// namespace.
    38  	creds := t.Credentials()
    39  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
    40  		return 0, nil, linuxerr.EPERM
    41  	}
    42  
    43  	// Ignore magic value that was required before Linux 2.4.
    44  	if flags&linux.MS_MGC_MSK == linux.MS_MGC_VAL {
    45  		flags = flags &^ linux.MS_MGC_MSK
    46  	}
    47  
    48  	// Silently allow MS_NOSUID, since we don't implement set-id bits anyway.
    49  	const unsupported = linux.MS_REMOUNT | linux.MS_SLAVE |
    50  		linux.MS_UNBINDABLE | linux.MS_MOVE | linux.MS_REC | linux.MS_NODIRATIME |
    51  		linux.MS_STRICTATIME
    52  
    53  	// Linux just allows passing any flags to mount(2) - it won't fail when
    54  	// unknown or unsupported flags are passed. Since we don't implement
    55  	// everything, we fail explicitly on flags that are unimplemented.
    56  	if flags&(unsupported) != 0 {
    57  		return 0, nil, linuxerr.EINVAL
    58  	}
    59  
    60  	// For null-terminated strings related to mount(2), Linux copies in at most
    61  	// a page worth of data. See fs/namespace.c:copy_mount_string().
    62  	targetPath, err := copyInPath(t, targetAddr)
    63  	if err != nil {
    64  		return 0, nil, err
    65  	}
    66  	target, err := getTaskPathOperation(t, linux.AT_FDCWD, targetPath, disallowEmptyPath, nofollowFinalSymlink)
    67  	if err != nil {
    68  		return 0, nil, err
    69  	}
    70  	defer target.Release(t)
    71  
    72  	if flags&linux.MS_BIND == linux.MS_BIND {
    73  		var sourcePath fspath.Path
    74  		sourcePath, err = copyInPath(t, sourceAddr)
    75  		if err != nil {
    76  			return 0, nil, err
    77  		}
    78  		var sourceTpop taskPathOperation
    79  		sourceTpop, err = getTaskPathOperation(t, linux.AT_FDCWD, sourcePath, disallowEmptyPath, followFinalSymlink)
    80  		if err != nil {
    81  			return 0, nil, err
    82  		}
    83  		defer sourceTpop.Release(t)
    84  		_, err = t.Kernel().VFS().BindAt(t, creds, &sourceTpop.pop, &target.pop)
    85  		return 0, nil, err
    86  	}
    87  	const propagationFlags = linux.MS_SHARED | linux.MS_PRIVATE | linux.MS_SLAVE | linux.MS_UNBINDABLE
    88  	if propFlag := flags & propagationFlags; propFlag != 0 {
    89  		// Check if flags is a power of 2. If not then more than one flag is set.
    90  		if !bits.IsPowerOfTwo64(propFlag) {
    91  			return 0, nil, linuxerr.EINVAL
    92  		}
    93  		propType := vfs.PropagationTypeFromLinux(propFlag)
    94  		return 0, nil, t.Kernel().VFS().SetMountPropagationAt(t, creds, &target.pop, propType)
    95  	}
    96  
    97  	// Only copy in source, fstype, and data if we are doing a normal mount.
    98  	source, err := t.CopyInString(sourceAddr, hostarch.PageSize)
    99  	if err != nil {
   100  		return 0, nil, err
   101  	}
   102  	fsType, err := t.CopyInString(typeAddr, hostarch.PageSize)
   103  	if err != nil {
   104  		return 0, nil, err
   105  	}
   106  	data := ""
   107  	if dataAddr != 0 {
   108  		// In Linux, a full page is always copied in regardless of null
   109  		// character placement, and the address is passed to each file system.
   110  		// Most file systems always treat this data as a string, though, and so
   111  		// do all of the ones we implement.
   112  		data, err = t.CopyInString(dataAddr, hostarch.PageSize)
   113  		if err != nil {
   114  			return 0, nil, err
   115  		}
   116  	}
   117  	var opts vfs.MountOptions
   118  	if flags&linux.MS_NOATIME == linux.MS_NOATIME {
   119  		opts.Flags.NoATime = true
   120  	}
   121  	if flags&linux.MS_NOEXEC == linux.MS_NOEXEC {
   122  		opts.Flags.NoExec = true
   123  	}
   124  	if flags&linux.MS_NODEV == linux.MS_NODEV {
   125  		opts.Flags.NoDev = true
   126  	}
   127  	if flags&linux.MS_NOSUID == linux.MS_NOSUID {
   128  		opts.Flags.NoSUID = true
   129  	}
   130  	if flags&linux.MS_RDONLY == linux.MS_RDONLY {
   131  		opts.ReadOnly = true
   132  	}
   133  	opts.GetFilesystemOptions.Data = data
   134  	_, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts)
   135  	return 0, nil, err
   136  }
   137  
   138  // Umount2 implements Linux syscall umount2(2).
   139  func Umount2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   140  	addr := args[0].Pointer()
   141  	flags := args[1].Int()
   142  
   143  	// Must have CAP_SYS_ADMIN in the mount namespace's associated user
   144  	// namespace.
   145  	//
   146  	// Currently, this is always the init task's user namespace.
   147  	creds := t.Credentials()
   148  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) {
   149  		return 0, nil, linuxerr.EPERM
   150  	}
   151  
   152  	const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE
   153  	if flags&unsupported != 0 {
   154  		return 0, nil, linuxerr.EINVAL
   155  	}
   156  
   157  	path, err := copyInPath(t, addr)
   158  	if err != nil {
   159  		return 0, nil, err
   160  	}
   161  	tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, shouldFollowFinalSymlink(flags&linux.UMOUNT_NOFOLLOW == 0))
   162  	if err != nil {
   163  		return 0, nil, err
   164  	}
   165  	defer tpop.Release(t)
   166  
   167  	opts := vfs.UmountOptions{
   168  		Flags: uint32(flags &^ linux.UMOUNT_NOFOLLOW),
   169  	}
   170  
   171  	return 0, nil, t.Kernel().VFS().UmountAt(t, creds, &tpop.pop, &opts)
   172  }