github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/syscalls/linux/sys_mount.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 19 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 20 "github.com/MerlinKodo/gvisor/pkg/fspath" 21 "github.com/MerlinKodo/gvisor/pkg/hostarch" 22 "github.com/MerlinKodo/gvisor/pkg/sentry/arch" 23 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 24 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 25 ) 26 27 // Mount implements Linux syscall mount(2). 28 func Mount(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 29 sourceAddr := args[0].Pointer() 30 targetAddr := args[1].Pointer() 31 typeAddr := args[2].Pointer() 32 flags := args[3].Uint64() 33 dataAddr := args[4].Pointer() 34 35 // Must have CAP_SYS_ADMIN in the current mount namespace's associated user 36 // namespace. 37 creds := t.Credentials() 38 if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) { 39 return 0, nil, linuxerr.EPERM 40 } 41 42 // Ignore magic value that was required before Linux 2.4. 43 if flags&linux.MS_MGC_MSK == linux.MS_MGC_VAL { 44 flags = flags &^ linux.MS_MGC_MSK 45 } 46 47 // Silently allow MS_NOSUID, since we don't implement set-id bits anyway. 48 const unsupported = linux.MS_REMOUNT | linux.MS_SLAVE | 49 linux.MS_UNBINDABLE | linux.MS_MOVE | linux.MS_REC | linux.MS_NODIRATIME 50 51 // Linux just allows passing any flags to mount(2) - it won't fail when 52 // unknown or unsupported flags are passed. Since we don't implement 53 // everything, we fail explicitly on flags that are unimplemented. 54 if flags&(unsupported) != 0 { 55 return 0, nil, linuxerr.EINVAL 56 } 57 58 // For null-terminated strings related to mount(2), Linux copies in at most 59 // a page worth of data. See fs/namespace.c:copy_mount_string(). 60 targetPath, err := copyInPath(t, targetAddr) 61 if err != nil { 62 return 0, nil, err 63 } 64 target, err := getTaskPathOperation(t, linux.AT_FDCWD, targetPath, disallowEmptyPath, followFinalSymlink) 65 if err != nil { 66 return 0, nil, err 67 } 68 defer target.Release(t) 69 70 if flags&linux.MS_BIND == linux.MS_BIND { 71 var sourcePath fspath.Path 72 sourcePath, err = copyInPath(t, sourceAddr) 73 if err != nil { 74 return 0, nil, err 75 } 76 var sourceTpop taskPathOperation 77 sourceTpop, err = getTaskPathOperation(t, linux.AT_FDCWD, sourcePath, disallowEmptyPath, followFinalSymlink) 78 if err != nil { 79 return 0, nil, err 80 } 81 defer sourceTpop.Release(t) 82 _, err = t.Kernel().VFS().BindAt(t, creds, &sourceTpop.pop, &target.pop) 83 return 0, nil, err 84 } 85 const propagationFlags = linux.MS_SHARED | linux.MS_PRIVATE | linux.MS_SLAVE | linux.MS_UNBINDABLE 86 if propFlag := flags & propagationFlags; propFlag != 0 { 87 return 0, nil, t.Kernel().VFS().SetMountPropagationAt(t, creds, &target.pop, uint32(propFlag)) 88 } 89 90 // Only copy in source, fstype, and data if we are doing a normal mount. 91 source, err := t.CopyInString(sourceAddr, hostarch.PageSize) 92 if err != nil { 93 return 0, nil, err 94 } 95 fsType, err := t.CopyInString(typeAddr, hostarch.PageSize) 96 if err != nil { 97 return 0, nil, err 98 } 99 data := "" 100 if dataAddr != 0 { 101 // In Linux, a full page is always copied in regardless of null 102 // character placement, and the address is passed to each file system. 103 // Most file systems always treat this data as a string, though, and so 104 // do all of the ones we implement. 105 data, err = t.CopyInString(dataAddr, hostarch.PageSize) 106 if err != nil { 107 return 0, nil, err 108 } 109 } 110 var opts vfs.MountOptions 111 if flags&(linux.MS_NOATIME|linux.MS_STRICTATIME) == linux.MS_NOATIME { 112 opts.Flags.NoATime = true 113 } 114 if flags&linux.MS_NOEXEC == linux.MS_NOEXEC { 115 opts.Flags.NoExec = true 116 } 117 if flags&linux.MS_NODEV == linux.MS_NODEV { 118 opts.Flags.NoDev = true 119 } 120 if flags&linux.MS_NOSUID == linux.MS_NOSUID { 121 opts.Flags.NoSUID = true 122 } 123 if flags&linux.MS_RDONLY == linux.MS_RDONLY { 124 opts.ReadOnly = true 125 } 126 opts.GetFilesystemOptions.Data = data 127 _, err = t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts) 128 return 0, nil, err 129 } 130 131 // Umount2 implements Linux syscall umount2(2). 132 func Umount2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 133 addr := args[0].Pointer() 134 flags := args[1].Int() 135 136 // Must have CAP_SYS_ADMIN in the mount namespace's associated user 137 // namespace. 138 // 139 // Currently, this is always the init task's user namespace. 140 creds := t.Credentials() 141 if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().Owner) { 142 return 0, nil, linuxerr.EPERM 143 } 144 145 const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE 146 if flags&unsupported != 0 { 147 return 0, nil, linuxerr.EINVAL 148 } 149 150 path, err := copyInPath(t, addr) 151 if err != nil { 152 return 0, nil, err 153 } 154 tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, shouldFollowFinalSymlink(flags&linux.UMOUNT_NOFOLLOW == 0)) 155 if err != nil { 156 return 0, nil, err 157 } 158 defer tpop.Release(t) 159 160 opts := vfs.UmountOptions{ 161 Flags: uint32(flags &^ linux.UMOUNT_NOFOLLOW), 162 } 163 164 return 0, nil, t.Kernel().VFS().UmountAt(t, creds, &tpop.pop, &opts) 165 }