github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/vfs/propagation.go (about) 1 // Copyright 2022 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "fmt" 19 "strings" 20 21 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 22 "github.com/MerlinKodo/gvisor/pkg/bits" 23 "github.com/MerlinKodo/gvisor/pkg/context" 24 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 25 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 26 ) 27 28 func propTypeToString(pflag uint32) string { 29 if pflag == 0 { 30 return "0" 31 } 32 var ( 33 b strings.Builder 34 sep string 35 ) 36 handleFlag := func(flag uint32, str string) { 37 if pflag&flag != 0 { 38 fmt.Fprintf(&b, "%s%s", sep, str) 39 sep = "|" 40 pflag &^= flag 41 } 42 } 43 handleFlag(linux.MS_SHARED, "shared") 44 handleFlag(linux.MS_PRIVATE, "private") 45 handleFlag(linux.MS_SLAVE, "slave") 46 handleFlag(linux.MS_UNBINDABLE, "unbindable") 47 if pflag != 0 { 48 fmt.Fprintf(&b, "%s%#x", sep, pflag) 49 } 50 return b.String() 51 } 52 53 // setPropagation sets the propagation on mnt for a propagation type. 54 // 55 // +checklocks:vfs.mountMu 56 func (vfs *VirtualFilesystem) setPropagation(mnt *Mount, pflag uint32) error { 57 switch pflag { 58 case linux.MS_SHARED: 59 if !mnt.isShared { 60 id, err := vfs.allocateGroupID() 61 if err != nil { 62 return err 63 } 64 mnt.groupID = id 65 mnt.sharedEntry.Init(mnt) 66 mnt.isShared = true 67 } 68 case linux.MS_PRIVATE: 69 if mnt.isShared { 70 if mnt.sharedEntry.Empty() { 71 vfs.freeGroupID(mnt.groupID) 72 } 73 mnt.sharedEntry.Remove() 74 mnt.groupID = 0 75 mnt.isShared = false 76 } 77 default: 78 panic(fmt.Sprintf("unsupported propagation type: %s", propTypeToString(pflag))) 79 } 80 return nil 81 } 82 83 // addPeer adds oth to mnt's peer group. Both will have the same groupID 84 // and sharedList. vfs.mountMu must be locked. 85 // 86 // +checklocks:vfs.mountMu 87 func (vfs *VirtualFilesystem) addPeer(mnt *Mount, new *Mount) { 88 mnt.sharedEntry.Add(&new.sharedEntry) 89 new.isShared = true 90 new.groupID = mnt.groupID 91 } 92 93 // preparePropagationTree returns a mapping of propagated mounts to their future 94 // mountpoints. The new mounts are clones of mnt and are added to mnt's peer 95 // group if vd.mount and mnt are shared. All the cloned mounts and new 96 // mountpoints in the tree have an extra reference taken. 97 // 98 // +checklocks:vfs.mountMu 99 // +checklocksalias:mnt.vfs.mountMu=vfs.mountMu 100 func (vfs *VirtualFilesystem) preparePropagationTree(mnt *Mount, vd VirtualDentry) map[*Mount]VirtualDentry { 101 tree := map[*Mount]VirtualDentry{} 102 if !vd.mount.isShared { 103 return tree 104 } 105 if !mnt.isShared { 106 vfs.setPropagation(mnt, linux.MS_SHARED) 107 } 108 for peer := vd.mount.sharedEntry.Next(); peer != vd.mount; peer = peer.sharedEntry.Next() { 109 // Skip newly added (disconnected) mounts. 110 if peer.ns == nil { 111 continue 112 } 113 peerVd := VirtualDentry{ 114 mount: peer, 115 dentry: vd.dentry, 116 } 117 peerVd.IncRef() 118 clone := vfs.cloneMount(mnt, mnt.root, nil) 119 tree[clone] = peerVd 120 } 121 return tree 122 } 123 124 // commitPropagationTree attaches to mounts in tree to the mountpoints they 125 // are mapped to. If there is an error attaching a mount, the method panics. 126 // 127 // +checklocks:vfs.mountMu 128 func (vfs *VirtualFilesystem) commitPropagationTree(ctx context.Context, tree map[*Mount]VirtualDentry) { 129 // The peer mounts should have no way of being dead if we've reached this 130 // point so its safe to connect without checks. 131 for mnt, vd := range tree { 132 // If there is already a mount at this (parent, point), disconnect it and 133 // reconnect it to the new mount once it is connected. 134 vd.dentry.mu.Lock() 135 child := vfs.mounts.Lookup(vd.mount, vd.dentry) 136 vfs.mounts.seq.BeginWrite() 137 if child != nil { 138 vfs.delayDecRef(vfs.disconnectLocked(child)) 139 } 140 vfs.connectLocked(mnt, vd, vd.mount.ns) 141 vfs.delayDecRef(mnt) 142 143 if child != nil { 144 newmp := VirtualDentry{mnt, mnt.root} 145 newmp.IncRef() 146 vfs.connectLocked(child, newmp, newmp.mount.ns) 147 vfs.delayDecRef(child) 148 } 149 vfs.mounts.seq.EndWrite() 150 vd.dentry.mu.Unlock() 151 } 152 } 153 154 // abortPropagationTree releases any references held by the mounts and 155 // mountpoints in the tree and removes the mounts from their peer groups. 156 // 157 // +checklocks:vfs.mountMu 158 func (vfs *VirtualFilesystem) abortPropagationTree(ctx context.Context, tree map[*Mount]VirtualDentry) { 159 for mnt, vd := range tree { 160 vfs.delayDecRef(vd) 161 vfs.delayDecRef(mnt) 162 vfs.setPropagation(mnt, linux.MS_PRIVATE) 163 } 164 } 165 166 // +checklocks:vfs.mountMu 167 func (vfs *VirtualFilesystem) commitPendingTree(ctx context.Context, mnt *Mount) { 168 for _, c := range mnt.pendingChildren { 169 vfs.commitTree(ctx, c) 170 } 171 mnt.pendingChildren = nil 172 } 173 174 // +checklocks:vfs.mountMu 175 func (vfs *VirtualFilesystem) commitTree(ctx context.Context, mnt *Mount) { 176 mp := mnt.getKey() 177 178 // If there is already a mount at this (parent, point), disconnect it from its 179 // parent and reconnect it to mnt once mnt has been connected. 180 child := vfs.mounts.Lookup(mp.mount, mp.dentry) 181 vfs.mounts.seq.BeginWrite() 182 if child != nil { 183 vfs.delayDecRef(vfs.disconnectLocked(child)) 184 } 185 vfs.connectLocked(mnt, mp, mp.mount.ns) 186 vfs.delayDecRef(mnt) 187 188 if child != nil { 189 newmp := VirtualDentry{mnt, mnt.root} 190 newmp.IncRef() 191 vfs.connectLocked(child, newmp, newmp.mount.ns) 192 vfs.delayDecRef(child) 193 } 194 vfs.mounts.seq.EndWrite() 195 vfs.commitPendingTree(ctx, mnt) 196 } 197 198 // abortTree releases references on a pending mount and all its pending 199 // descendants. 200 // 201 // +checklocks:vfs.mountMu 202 func (vfs *VirtualFilesystem) abortTree(ctx context.Context, mnt *Mount) { 203 vfs.delayDecRef(mnt) 204 vfs.delayDecRef(mnt.getKey()) 205 mnt.setKey(VirtualDentry{}) 206 vfs.setPropagation(mnt, linux.MS_PRIVATE) 207 for _, c := range mnt.pendingChildren { 208 vfs.abortTree(ctx, c) 209 } 210 mnt.pendingChildren = nil 211 } 212 213 // SetMountPropagationAt changes the propagation type of the mount pointed to by 214 // pop. 215 func (vfs *VirtualFilesystem) SetMountPropagationAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, propFlags uint32) error { 216 // Check if flags is a power of 2. If not then more than one flag is set. 217 if !bits.IsPowerOfTwo32(propFlags) { 218 return linuxerr.EINVAL 219 } 220 vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) 221 if err != nil { 222 return err 223 } 224 // See the similar defer in UmountAt for why this is in a closure. 225 defer func() { 226 vd.DecRef(ctx) 227 }() 228 if vd.dentry.isMounted() { 229 if realmnt := vfs.getMountAt(ctx, vd.mount, vd.dentry); realmnt != nil { 230 vd.mount.DecRef(ctx) 231 vd.mount = realmnt 232 } 233 } else if vd.dentry != vd.mount.root { 234 return linuxerr.EINVAL 235 } 236 vfs.SetMountPropagation(vd.mount, propFlags) 237 return nil 238 } 239 240 // SetMountPropagation changes the propagation type of the mount. 241 func (vfs *VirtualFilesystem) SetMountPropagation(mnt *Mount, propFlags uint32) { 242 vfs.lockMounts() 243 defer vfs.unlockMounts(context.Background()) 244 if propFlags&(linux.MS_SHARED|linux.MS_PRIVATE) != 0 { 245 vfs.setPropagation(mnt, propFlags) 246 } else { 247 panic(fmt.Sprintf("unsupported propagation type: %s", propTypeToString(propFlags))) 248 } 249 }