github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/overlay/copy_up.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "fmt" 19 20 "github.com/metacubex/gvisor/pkg/abi/linux" 21 "github.com/metacubex/gvisor/pkg/context" 22 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 23 "github.com/metacubex/gvisor/pkg/fspath" 24 "github.com/metacubex/gvisor/pkg/hostarch" 25 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 26 "github.com/metacubex/gvisor/pkg/sentry/memmap" 27 "github.com/metacubex/gvisor/pkg/sentry/vfs" 28 ) 29 30 func (d *dentry) isCopiedUp() bool { 31 return d.copiedUp.Load() != 0 32 } 33 34 func (d *dentry) canBeCopiedUp() bool { 35 ftype := d.mode.Load() & linux.S_IFMT 36 switch ftype { 37 case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR: 38 // Can be copied-up. 39 return true 40 default: 41 // Can't be copied-up. 42 return false 43 } 44 } 45 46 // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok(). 47 // 48 // Preconditions: filesystem.renameMu must be locked. 49 func (d *dentry) copyUpLocked(ctx context.Context) error { 50 return d.copyUpMaybeSyntheticMountpointLocked(ctx, false /* forSyntheticMountpoint */) 51 } 52 53 func (d *dentry) copyUpMaybeSyntheticMountpointLocked(ctx context.Context, forSyntheticMountpoint bool) error { 54 // Fast path. 55 if d.isCopiedUp() { 56 return nil 57 } 58 59 // Attach our credentials to the context, as some VFS operations use 60 // credentials from context rather an take an explicit creds parameter. 61 ctx = auth.ContextWithCredentials(ctx, d.fs.creds) 62 63 if !d.canBeCopiedUp() { 64 return linuxerr.EPERM 65 } 66 67 // Ensure that our parent directory is copied-up. 68 parent := d.parent.Load() 69 if parent == nil { 70 // d is a filesystem root with no upper layer. 71 return linuxerr.EROFS 72 } 73 if err := parent.copyUpMaybeSyntheticMountpointLocked(ctx, forSyntheticMountpoint); err != nil { 74 return err 75 } 76 77 d.copyMu.Lock() 78 defer d.copyMu.Unlock() 79 if d.upperVD.Ok() { 80 // Raced with another call to d.copyUpLocked(). 81 return nil 82 } 83 if d.vfsd.IsDead() { 84 // Raced with deletion of d. 85 return linuxerr.ENOENT 86 } 87 88 // Obtain settable timestamps from the lower layer. 89 vfsObj := d.fs.vfsfs.VirtualFilesystem() 90 oldpop := vfs.PathOperation{ 91 Root: d.lowerVDs[0], 92 Start: d.lowerVDs[0], 93 } 94 const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME 95 oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{ 96 Mask: timestampsMask, 97 }) 98 if err != nil { 99 return err 100 } 101 102 // Perform copy-up. 103 ftype := d.mode.Load() & linux.S_IFMT 104 newpop := vfs.PathOperation{ 105 Root: parent.upperVD, 106 Start: parent.upperVD, 107 Path: fspath.Parse(d.name), 108 } 109 // Used during copy-up of memory-mapped regular files. 110 var mmapOpts *memmap.MMapOpts 111 cleanupUndoCopyUp := func() { 112 var err error 113 if ftype == linux.S_IFDIR { 114 err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop) 115 } else { 116 err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop) 117 } 118 if err != nil { 119 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err)) 120 } 121 if d.upperVD.Ok() { 122 d.upperVD.DecRef(ctx) 123 d.upperVD = vfs.VirtualDentry{} 124 } 125 } 126 switch ftype { 127 case linux.S_IFREG: 128 oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{ 129 Flags: linux.O_RDONLY, 130 }) 131 if err != nil { 132 return err 133 } 134 defer oldFD.DecRef(ctx) 135 newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{ 136 Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL, 137 // d.mode can be read because d.copyMu is locked. 138 Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT), 139 }) 140 if err != nil { 141 return err 142 } 143 defer newFD.DecRef(ctx) 144 if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil { 145 cleanupUndoCopyUp() 146 return err 147 } 148 if d.wrappedMappable != nil { 149 // We may have memory mappings of the file on the lower layer. 150 // Switch to mapping the file on the upper layer instead. 151 mmapOpts = &memmap.MMapOpts{ 152 Perms: hostarch.ReadWrite, 153 MaxPerms: hostarch.ReadWrite, 154 } 155 if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { 156 cleanupUndoCopyUp() 157 return err 158 } 159 if mmapOpts.MappingIdentity != nil { 160 mmapOpts.MappingIdentity.DecRef(ctx) 161 } 162 // Don't actually switch Mappables until the end of copy-up; see 163 // below for why. 164 } 165 if err := newFD.SetStat(ctx, vfs.SetStatOptions{ 166 Stat: linux.Statx{ 167 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 168 // d.uid and d.gid can be read because d.copyMu is locked. 169 UID: d.uid.RacyLoad(), 170 GID: d.gid.RacyLoad(), 171 Atime: oldStat.Atime, 172 Mtime: oldStat.Mtime, 173 }, 174 }); err != nil { 175 cleanupUndoCopyUp() 176 return err 177 } 178 d.upperVD = newFD.VirtualDentry() 179 d.upperVD.IncRef() 180 181 case linux.S_IFDIR: 182 if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{ 183 // d.mode can be read because d.copyMu is locked. 184 Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT), 185 ForSyntheticMountpoint: forSyntheticMountpoint, 186 }); err != nil { 187 return err 188 } 189 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 190 Stat: linux.Statx{ 191 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 192 // d.uid and d.gid can be read because d.copyMu is locked. 193 UID: d.uid.RacyLoad(), 194 GID: d.gid.RacyLoad(), 195 Atime: oldStat.Atime, 196 Mtime: oldStat.Mtime, 197 }, 198 }); err != nil { 199 cleanupUndoCopyUp() 200 return err 201 } 202 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 203 if err != nil { 204 cleanupUndoCopyUp() 205 return err 206 } 207 d.upperVD = upperVD 208 209 case linux.S_IFLNK: 210 target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop) 211 if err != nil { 212 return err 213 } 214 if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil { 215 return err 216 } 217 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 218 Stat: linux.Statx{ 219 Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 220 // d.{uid,gid,mode} can be read because d.copyMu is locked. 221 Mode: uint16(d.mode.RacyLoad()), 222 UID: d.uid.RacyLoad(), 223 GID: d.gid.RacyLoad(), 224 Atime: oldStat.Atime, 225 Mtime: oldStat.Mtime, 226 }, 227 }); err != nil { 228 cleanupUndoCopyUp() 229 return err 230 } 231 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 232 if err != nil { 233 cleanupUndoCopyUp() 234 return err 235 } 236 d.upperVD = upperVD 237 238 case linux.S_IFBLK, linux.S_IFCHR: 239 if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{ 240 // d.mode can be read because d.copyMu is locked. 241 Mode: linux.FileMode(d.mode.RacyLoad()), 242 DevMajor: oldStat.RdevMajor, 243 DevMinor: oldStat.RdevMinor, 244 }); err != nil { 245 return err 246 } 247 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 248 Stat: linux.Statx{ 249 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 250 // d.uid and d.gid can be read because d.copyMu is locked. 251 UID: d.uid.RacyLoad(), 252 GID: d.gid.RacyLoad(), 253 Atime: oldStat.Atime, 254 Mtime: oldStat.Mtime, 255 }, 256 }); err != nil { 257 cleanupUndoCopyUp() 258 return err 259 } 260 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 261 if err != nil { 262 cleanupUndoCopyUp() 263 return err 264 } 265 d.upperVD = upperVD 266 267 default: 268 // Should have rejected this at the beginning of this function? 269 panic(fmt.Sprintf("unexpected file type %o", ftype)) 270 } 271 272 if err := d.copyXattrsLocked(ctx); err != nil { 273 cleanupUndoCopyUp() 274 return err 275 } 276 277 // Update the dentry's device and inode numbers (except for directories, 278 // for which these remain overlay-assigned). 279 if ftype != linux.S_IFDIR { 280 upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ 281 Root: d.upperVD, 282 Start: d.upperVD, 283 }, &vfs.StatOptions{ 284 Mask: linux.STATX_INO, 285 }) 286 if err != nil { 287 cleanupUndoCopyUp() 288 return err 289 } 290 if upperStat.Mask&linux.STATX_INO == 0 { 291 cleanupUndoCopyUp() 292 return linuxerr.EREMOTE 293 } 294 d.devMajor.Store(upperStat.DevMajor) 295 d.devMinor.Store(upperStat.DevMinor) 296 d.ino.Store(upperStat.Ino) 297 298 // Lower level dentries for non-directories are no longer accessible from 299 // the overlayfs anymore after copyup. Ask filesystems to release their 300 // resources whenever possible. 301 for _, lowerDentry := range d.lowerVDs { 302 lowerDentry.Dentry().MarkEvictable() 303 } 304 } 305 306 if mmapOpts != nil && mmapOpts.Mappable != nil { 307 d.mapsMu.Lock() 308 defer d.mapsMu.Unlock() 309 310 // Propagate mappings of d to the new Mappable. Remember which mappings 311 // we added so we can remove them on failure. 312 upperMappable := mmapOpts.Mappable 313 allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange) 314 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 315 added := make(memmap.MappingsOfRange) 316 for m := range seg.Value() { 317 if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil { 318 for m := range added { 319 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 320 } 321 for mr, mappings := range allAdded { 322 for m := range mappings { 323 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable) 324 } 325 } 326 return err 327 } 328 added[m] = struct{}{} 329 } 330 allAdded[seg.Range()] = added 331 } 332 333 // Switch to the new Mappable. We do this at the end of copy-up 334 // because: 335 // 336 // - We need to switch Mappables (by changing d.wrappedMappable) before 337 // invalidating Translations from the old Mappable (to pick up 338 // Translations from the new one). 339 // 340 // - We need to lock d.dataMu while changing d.wrappedMappable, but 341 // must invalidate Translations with d.dataMu unlocked (due to lock 342 // ordering). 343 // 344 // - Consequently, once we unlock d.dataMu, other threads may 345 // immediately observe the new (copied-up) Mappable, which we want to 346 // delay until copy-up is guaranteed to succeed. 347 d.dataMu.Lock() 348 lowerMappable := d.wrappedMappable 349 d.wrappedMappable = upperMappable 350 d.dataMu.Unlock() 351 d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{}) 352 353 // Remove mappings from the old Mappable. 354 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 355 for m := range seg.Value() { 356 lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 357 } 358 } 359 d.lowerMappings.RemoveAll() 360 } 361 362 d.copiedUp.Store(1) 363 return nil 364 } 365 366 // copyXattrsLocked copies a subset of lower's extended attributes to upper. 367 // Attributes that configure an overlay in the lower are not copied up. 368 // 369 // Preconditions: d.copyMu must be locked for writing. 370 func (d *dentry) copyXattrsLocked(ctx context.Context) error { 371 vfsObj := d.fs.vfsfs.VirtualFilesystem() 372 lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]} 373 upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD} 374 375 lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0) 376 if err != nil { 377 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 378 // There are no guarantees as to the contents of lowerXattrs. 379 return nil 380 } 381 ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err) 382 return err 383 } 384 385 for _, name := range lowerXattrs { 386 // Do not copy up overlay attributes. 387 if isOverlayXattr(name) { 388 continue 389 } 390 391 value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0}) 392 if err != nil { 393 ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err) 394 return err 395 } 396 397 if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil { 398 ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err) 399 return err 400 } 401 } 402 return nil 403 } 404 405 // copyUpDescendantsLocked ensures that all descendants of d are copied up. 406 // 407 // Preconditions: 408 // - filesystem.renameMu must be locked. 409 // - d.dirMu must be locked. 410 // - d.isDir(). 411 func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error { 412 dirents, err := d.getDirentsLocked(ctx) 413 if err != nil { 414 return err 415 } 416 for _, dirent := range dirents { 417 if dirent.Name == "." || dirent.Name == ".." { 418 continue 419 } 420 child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds) 421 if err != nil { 422 return err 423 } 424 if err := child.copyUpLocked(ctx); err != nil { 425 return err 426 } 427 if child.isDir() { 428 child.dirMu.Lock() 429 err := child.copyUpDescendantsLocked(ctx, ds) 430 child.dirMu.Unlock() 431 if err != nil { 432 return err 433 } 434 } 435 } 436 return nil 437 }