github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/vfs/propagation.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  
    21  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    22  	"github.com/MerlinKodo/gvisor/pkg/bits"
    23  	"github.com/MerlinKodo/gvisor/pkg/context"
    24  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    25  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    26  )
    27  
    28  func propTypeToString(pflag uint32) string {
    29  	if pflag == 0 {
    30  		return "0"
    31  	}
    32  	var (
    33  		b   strings.Builder
    34  		sep string
    35  	)
    36  	handleFlag := func(flag uint32, str string) {
    37  		if pflag&flag != 0 {
    38  			fmt.Fprintf(&b, "%s%s", sep, str)
    39  			sep = "|"
    40  			pflag &^= flag
    41  		}
    42  	}
    43  	handleFlag(linux.MS_SHARED, "shared")
    44  	handleFlag(linux.MS_PRIVATE, "private")
    45  	handleFlag(linux.MS_SLAVE, "slave")
    46  	handleFlag(linux.MS_UNBINDABLE, "unbindable")
    47  	if pflag != 0 {
    48  		fmt.Fprintf(&b, "%s%#x", sep, pflag)
    49  	}
    50  	return b.String()
    51  }
    52  
    53  // setPropagation sets the propagation on mnt for a propagation type.
    54  //
    55  // +checklocks:vfs.mountMu
    56  func (vfs *VirtualFilesystem) setPropagation(mnt *Mount, pflag uint32) error {
    57  	switch pflag {
    58  	case linux.MS_SHARED:
    59  		if !mnt.isShared {
    60  			id, err := vfs.allocateGroupID()
    61  			if err != nil {
    62  				return err
    63  			}
    64  			mnt.groupID = id
    65  			mnt.sharedEntry.Init(mnt)
    66  			mnt.isShared = true
    67  		}
    68  	case linux.MS_PRIVATE:
    69  		if mnt.isShared {
    70  			if mnt.sharedEntry.Empty() {
    71  				vfs.freeGroupID(mnt.groupID)
    72  			}
    73  			mnt.sharedEntry.Remove()
    74  			mnt.groupID = 0
    75  			mnt.isShared = false
    76  		}
    77  	default:
    78  		panic(fmt.Sprintf("unsupported propagation type: %s", propTypeToString(pflag)))
    79  	}
    80  	return nil
    81  }
    82  
    83  // addPeer adds oth to mnt's peer group. Both will have the same groupID
    84  // and sharedList. vfs.mountMu must be locked.
    85  //
    86  // +checklocks:vfs.mountMu
    87  func (vfs *VirtualFilesystem) addPeer(mnt *Mount, new *Mount) {
    88  	mnt.sharedEntry.Add(&new.sharedEntry)
    89  	new.isShared = true
    90  	new.groupID = mnt.groupID
    91  }
    92  
    93  // preparePropagationTree returns a mapping of propagated mounts to their future
    94  // mountpoints. The new mounts are clones of mnt and are added to mnt's peer
    95  // group if vd.mount and mnt are shared. All the cloned mounts and new
    96  // mountpoints in the tree have an extra reference taken.
    97  //
    98  // +checklocks:vfs.mountMu
    99  // +checklocksalias:mnt.vfs.mountMu=vfs.mountMu
   100  func (vfs *VirtualFilesystem) preparePropagationTree(mnt *Mount, vd VirtualDentry) map[*Mount]VirtualDentry {
   101  	tree := map[*Mount]VirtualDentry{}
   102  	if !vd.mount.isShared {
   103  		return tree
   104  	}
   105  	if !mnt.isShared {
   106  		vfs.setPropagation(mnt, linux.MS_SHARED)
   107  	}
   108  	for peer := vd.mount.sharedEntry.Next(); peer != vd.mount; peer = peer.sharedEntry.Next() {
   109  		// Skip newly added (disconnected) mounts.
   110  		if peer.ns == nil {
   111  			continue
   112  		}
   113  		peerVd := VirtualDentry{
   114  			mount:  peer,
   115  			dentry: vd.dentry,
   116  		}
   117  		peerVd.IncRef()
   118  		clone := vfs.cloneMount(mnt, mnt.root, nil)
   119  		tree[clone] = peerVd
   120  	}
   121  	return tree
   122  }
   123  
   124  // commitPropagationTree attaches to mounts in tree to the mountpoints they
   125  // are mapped to. If there is an error attaching a mount, the method panics.
   126  //
   127  // +checklocks:vfs.mountMu
   128  func (vfs *VirtualFilesystem) commitPropagationTree(ctx context.Context, tree map[*Mount]VirtualDentry) {
   129  	// The peer mounts should have no way of being dead if we've reached this
   130  	// point so its safe to connect without checks.
   131  	for mnt, vd := range tree {
   132  		// If there is already a mount at this (parent, point), disconnect it and
   133  		// reconnect it to the new mount once it is connected.
   134  		vd.dentry.mu.Lock()
   135  		child := vfs.mounts.Lookup(vd.mount, vd.dentry)
   136  		vfs.mounts.seq.BeginWrite()
   137  		if child != nil {
   138  			vfs.delayDecRef(vfs.disconnectLocked(child))
   139  		}
   140  		vfs.connectLocked(mnt, vd, vd.mount.ns)
   141  		vfs.delayDecRef(mnt)
   142  
   143  		if child != nil {
   144  			newmp := VirtualDentry{mnt, mnt.root}
   145  			newmp.IncRef()
   146  			vfs.connectLocked(child, newmp, newmp.mount.ns)
   147  			vfs.delayDecRef(child)
   148  		}
   149  		vfs.mounts.seq.EndWrite()
   150  		vd.dentry.mu.Unlock()
   151  	}
   152  }
   153  
   154  // abortPropagationTree releases any references held by the mounts and
   155  // mountpoints in the tree and removes the mounts from their peer groups.
   156  //
   157  // +checklocks:vfs.mountMu
   158  func (vfs *VirtualFilesystem) abortPropagationTree(ctx context.Context, tree map[*Mount]VirtualDentry) {
   159  	for mnt, vd := range tree {
   160  		vfs.delayDecRef(vd)
   161  		vfs.delayDecRef(mnt)
   162  		vfs.setPropagation(mnt, linux.MS_PRIVATE)
   163  	}
   164  }
   165  
   166  // +checklocks:vfs.mountMu
   167  func (vfs *VirtualFilesystem) commitPendingTree(ctx context.Context, mnt *Mount) {
   168  	for _, c := range mnt.pendingChildren {
   169  		vfs.commitTree(ctx, c)
   170  	}
   171  	mnt.pendingChildren = nil
   172  }
   173  
   174  // +checklocks:vfs.mountMu
   175  func (vfs *VirtualFilesystem) commitTree(ctx context.Context, mnt *Mount) {
   176  	mp := mnt.getKey()
   177  
   178  	// If there is already a mount at this (parent, point), disconnect it from its
   179  	// parent and reconnect it to mnt once mnt has been connected.
   180  	child := vfs.mounts.Lookup(mp.mount, mp.dentry)
   181  	vfs.mounts.seq.BeginWrite()
   182  	if child != nil {
   183  		vfs.delayDecRef(vfs.disconnectLocked(child))
   184  	}
   185  	vfs.connectLocked(mnt, mp, mp.mount.ns)
   186  	vfs.delayDecRef(mnt)
   187  
   188  	if child != nil {
   189  		newmp := VirtualDentry{mnt, mnt.root}
   190  		newmp.IncRef()
   191  		vfs.connectLocked(child, newmp, newmp.mount.ns)
   192  		vfs.delayDecRef(child)
   193  	}
   194  	vfs.mounts.seq.EndWrite()
   195  	vfs.commitPendingTree(ctx, mnt)
   196  }
   197  
   198  // abortTree releases references on a pending mount and all its pending
   199  // descendants.
   200  //
   201  // +checklocks:vfs.mountMu
   202  func (vfs *VirtualFilesystem) abortTree(ctx context.Context, mnt *Mount) {
   203  	vfs.delayDecRef(mnt)
   204  	vfs.delayDecRef(mnt.getKey())
   205  	mnt.setKey(VirtualDentry{})
   206  	vfs.setPropagation(mnt, linux.MS_PRIVATE)
   207  	for _, c := range mnt.pendingChildren {
   208  		vfs.abortTree(ctx, c)
   209  	}
   210  	mnt.pendingChildren = nil
   211  }
   212  
   213  // SetMountPropagationAt changes the propagation type of the mount pointed to by
   214  // pop.
   215  func (vfs *VirtualFilesystem) SetMountPropagationAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, propFlags uint32) error {
   216  	// Check if flags is a power of 2. If not then more than one flag is set.
   217  	if !bits.IsPowerOfTwo32(propFlags) {
   218  		return linuxerr.EINVAL
   219  	}
   220  	vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{})
   221  	if err != nil {
   222  		return err
   223  	}
   224  	// See the similar defer in UmountAt for why this is in a closure.
   225  	defer func() {
   226  		vd.DecRef(ctx)
   227  	}()
   228  	if vd.dentry.isMounted() {
   229  		if realmnt := vfs.getMountAt(ctx, vd.mount, vd.dentry); realmnt != nil {
   230  			vd.mount.DecRef(ctx)
   231  			vd.mount = realmnt
   232  		}
   233  	} else if vd.dentry != vd.mount.root {
   234  		return linuxerr.EINVAL
   235  	}
   236  	vfs.SetMountPropagation(vd.mount, propFlags)
   237  	return nil
   238  }
   239  
   240  // SetMountPropagation changes the propagation type of the mount.
   241  func (vfs *VirtualFilesystem) SetMountPropagation(mnt *Mount, propFlags uint32) {
   242  	vfs.lockMounts()
   243  	defer vfs.unlockMounts(context.Background())
   244  	if propFlags&(linux.MS_SHARED|linux.MS_PRIVATE) != 0 {
   245  		vfs.setPropagation(mnt, propFlags)
   246  	} else {
   247  		panic(fmt.Sprintf("unsupported propagation type: %s", propTypeToString(propFlags)))
   248  	}
   249  }