github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/overlay/copy_up.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "fmt" 19 20 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 21 "github.com/nicocha30/gvisor-ligolo/pkg/context" 22 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 23 "github.com/nicocha30/gvisor-ligolo/pkg/fspath" 24 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 28 ) 29 30 func (d *dentry) isCopiedUp() bool { 31 return d.copiedUp.Load() != 0 32 } 33 34 func (d *dentry) canBeCopiedUp() bool { 35 ftype := d.mode.Load() & linux.S_IFMT 36 switch ftype { 37 case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR: 38 // Can be copied-up. 39 return true 40 default: 41 // Can't be copied-up. 42 return false 43 } 44 } 45 46 // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok(). 47 // 48 // Preconditions: filesystem.renameMu must be locked. 49 func (d *dentry) copyUpLocked(ctx context.Context) error { 50 return d.copyUpMaybeSyntheticMountpointLocked(ctx, false /* forSyntheticMountpoint */) 51 } 52 53 func (d *dentry) copyUpMaybeSyntheticMountpointLocked(ctx context.Context, forSyntheticMountpoint bool) error { 54 // Fast path. 55 if d.isCopiedUp() { 56 return nil 57 } 58 59 // Attach our credentials to the context, as some VFS operations use 60 // credentials from context rather an take an explicit creds parameter. 61 ctx = auth.ContextWithCredentials(ctx, d.fs.creds) 62 63 if !d.canBeCopiedUp() { 64 return linuxerr.EPERM 65 } 66 67 // Ensure that our parent directory is copied-up. 68 if d.parent == nil { 69 // d is a filesystem root with no upper layer. 70 return linuxerr.EROFS 71 } 72 if err := d.parent.copyUpMaybeSyntheticMountpointLocked(ctx, forSyntheticMountpoint); err != nil { 73 return err 74 } 75 76 d.copyMu.Lock() 77 defer d.copyMu.Unlock() 78 if d.upperVD.Ok() { 79 // Raced with another call to d.copyUpLocked(). 80 return nil 81 } 82 if d.vfsd.IsDead() { 83 // Raced with deletion of d. 84 return linuxerr.ENOENT 85 } 86 87 // Obtain settable timestamps from the lower layer. 88 vfsObj := d.fs.vfsfs.VirtualFilesystem() 89 oldpop := vfs.PathOperation{ 90 Root: d.lowerVDs[0], 91 Start: d.lowerVDs[0], 92 } 93 const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME 94 oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{ 95 Mask: timestampsMask, 96 }) 97 if err != nil { 98 return err 99 } 100 101 // Perform copy-up. 102 ftype := d.mode.Load() & linux.S_IFMT 103 newpop := vfs.PathOperation{ 104 Root: d.parent.upperVD, 105 Start: d.parent.upperVD, 106 Path: fspath.Parse(d.name), 107 } 108 // Used during copy-up of memory-mapped regular files. 109 var mmapOpts *memmap.MMapOpts 110 cleanupUndoCopyUp := func() { 111 var err error 112 if ftype == linux.S_IFDIR { 113 err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop) 114 } else { 115 err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop) 116 } 117 if err != nil { 118 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err)) 119 } 120 if d.upperVD.Ok() { 121 d.upperVD.DecRef(ctx) 122 d.upperVD = vfs.VirtualDentry{} 123 } 124 } 125 switch ftype { 126 case linux.S_IFREG: 127 oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{ 128 Flags: linux.O_RDONLY, 129 }) 130 if err != nil { 131 return err 132 } 133 defer oldFD.DecRef(ctx) 134 newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{ 135 Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL, 136 // d.mode can be read because d.copyMu is locked. 137 Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT), 138 }) 139 if err != nil { 140 return err 141 } 142 defer newFD.DecRef(ctx) 143 if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil { 144 cleanupUndoCopyUp() 145 return err 146 } 147 if d.wrappedMappable != nil { 148 // We may have memory mappings of the file on the lower layer. 149 // Switch to mapping the file on the upper layer instead. 150 mmapOpts = &memmap.MMapOpts{ 151 Perms: hostarch.ReadWrite, 152 MaxPerms: hostarch.ReadWrite, 153 } 154 if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { 155 cleanupUndoCopyUp() 156 return err 157 } 158 if mmapOpts.MappingIdentity != nil { 159 mmapOpts.MappingIdentity.DecRef(ctx) 160 } 161 // Don't actually switch Mappables until the end of copy-up; see 162 // below for why. 163 } 164 if err := newFD.SetStat(ctx, vfs.SetStatOptions{ 165 Stat: linux.Statx{ 166 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 167 // d.uid and d.gid can be read because d.copyMu is locked. 168 UID: d.uid.RacyLoad(), 169 GID: d.gid.RacyLoad(), 170 Atime: oldStat.Atime, 171 Mtime: oldStat.Mtime, 172 }, 173 }); err != nil { 174 cleanupUndoCopyUp() 175 return err 176 } 177 d.upperVD = newFD.VirtualDentry() 178 d.upperVD.IncRef() 179 180 case linux.S_IFDIR: 181 if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{ 182 // d.mode can be read because d.copyMu is locked. 183 Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT), 184 ForSyntheticMountpoint: forSyntheticMountpoint, 185 }); err != nil { 186 return err 187 } 188 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 189 Stat: linux.Statx{ 190 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 191 // d.uid and d.gid can be read because d.copyMu is locked. 192 UID: d.uid.RacyLoad(), 193 GID: d.gid.RacyLoad(), 194 Atime: oldStat.Atime, 195 Mtime: oldStat.Mtime, 196 }, 197 }); err != nil { 198 cleanupUndoCopyUp() 199 return err 200 } 201 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 202 if err != nil { 203 cleanupUndoCopyUp() 204 return err 205 } 206 d.upperVD = upperVD 207 208 case linux.S_IFLNK: 209 target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop) 210 if err != nil { 211 return err 212 } 213 if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil { 214 return err 215 } 216 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 217 Stat: linux.Statx{ 218 Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 219 // d.{uid,gid,mode} can be read because d.copyMu is locked. 220 Mode: uint16(d.mode.RacyLoad()), 221 UID: d.uid.RacyLoad(), 222 GID: d.gid.RacyLoad(), 223 Atime: oldStat.Atime, 224 Mtime: oldStat.Mtime, 225 }, 226 }); err != nil { 227 cleanupUndoCopyUp() 228 return err 229 } 230 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 231 if err != nil { 232 cleanupUndoCopyUp() 233 return err 234 } 235 d.upperVD = upperVD 236 237 case linux.S_IFBLK, linux.S_IFCHR: 238 if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{ 239 // d.mode can be read because d.copyMu is locked. 240 Mode: linux.FileMode(d.mode.RacyLoad()), 241 DevMajor: oldStat.RdevMajor, 242 DevMinor: oldStat.RdevMinor, 243 }); err != nil { 244 return err 245 } 246 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 247 Stat: linux.Statx{ 248 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 249 // d.uid and d.gid can be read because d.copyMu is locked. 250 UID: d.uid.RacyLoad(), 251 GID: d.gid.RacyLoad(), 252 Atime: oldStat.Atime, 253 Mtime: oldStat.Mtime, 254 }, 255 }); err != nil { 256 cleanupUndoCopyUp() 257 return err 258 } 259 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 260 if err != nil { 261 cleanupUndoCopyUp() 262 return err 263 } 264 d.upperVD = upperVD 265 266 default: 267 // Should have rejected this at the beginning of this function? 268 panic(fmt.Sprintf("unexpected file type %o", ftype)) 269 } 270 271 if err := d.copyXattrsLocked(ctx); err != nil { 272 cleanupUndoCopyUp() 273 return err 274 } 275 276 // Update the dentry's device and inode numbers (except for directories, 277 // for which these remain overlay-assigned). 278 if ftype != linux.S_IFDIR { 279 upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ 280 Root: d.upperVD, 281 Start: d.upperVD, 282 }, &vfs.StatOptions{ 283 Mask: linux.STATX_INO, 284 }) 285 if err != nil { 286 cleanupUndoCopyUp() 287 return err 288 } 289 if upperStat.Mask&linux.STATX_INO == 0 { 290 cleanupUndoCopyUp() 291 return linuxerr.EREMOTE 292 } 293 d.devMajor.Store(upperStat.DevMajor) 294 d.devMinor.Store(upperStat.DevMinor) 295 d.ino.Store(upperStat.Ino) 296 297 // Lower level dentries for non-directories are no longer accessible from 298 // the overlayfs anymore after copyup. Ask filesystems to release their 299 // resources whenever possible. 300 for _, lowerDentry := range d.lowerVDs { 301 lowerDentry.Dentry().MarkEvictable() 302 } 303 } 304 305 if mmapOpts != nil && mmapOpts.Mappable != nil { 306 d.mapsMu.Lock() 307 defer d.mapsMu.Unlock() 308 309 // Propagate mappings of d to the new Mappable. Remember which mappings 310 // we added so we can remove them on failure. 311 upperMappable := mmapOpts.Mappable 312 allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange) 313 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 314 added := make(memmap.MappingsOfRange) 315 for m := range seg.Value() { 316 if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil { 317 for m := range added { 318 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 319 } 320 for mr, mappings := range allAdded { 321 for m := range mappings { 322 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable) 323 } 324 } 325 return err 326 } 327 added[m] = struct{}{} 328 } 329 allAdded[seg.Range()] = added 330 } 331 332 // Switch to the new Mappable. We do this at the end of copy-up 333 // because: 334 // 335 // - We need to switch Mappables (by changing d.wrappedMappable) before 336 // invalidating Translations from the old Mappable (to pick up 337 // Translations from the new one). 338 // 339 // - We need to lock d.dataMu while changing d.wrappedMappable, but 340 // must invalidate Translations with d.dataMu unlocked (due to lock 341 // ordering). 342 // 343 // - Consequently, once we unlock d.dataMu, other threads may 344 // immediately observe the new (copied-up) Mappable, which we want to 345 // delay until copy-up is guaranteed to succeed. 346 d.dataMu.Lock() 347 lowerMappable := d.wrappedMappable 348 d.wrappedMappable = upperMappable 349 d.dataMu.Unlock() 350 d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{}) 351 352 // Remove mappings from the old Mappable. 353 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 354 for m := range seg.Value() { 355 lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 356 } 357 } 358 d.lowerMappings.RemoveAll() 359 } 360 361 d.copiedUp.Store(1) 362 return nil 363 } 364 365 // copyXattrsLocked copies a subset of lower's extended attributes to upper. 366 // Attributes that configure an overlay in the lower are not copied up. 367 // 368 // Preconditions: d.copyMu must be locked for writing. 369 func (d *dentry) copyXattrsLocked(ctx context.Context) error { 370 vfsObj := d.fs.vfsfs.VirtualFilesystem() 371 lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]} 372 upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD} 373 374 lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0) 375 if err != nil { 376 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 377 // There are no guarantees as to the contents of lowerXattrs. 378 return nil 379 } 380 ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err) 381 return err 382 } 383 384 for _, name := range lowerXattrs { 385 // Do not copy up overlay attributes. 386 if isOverlayXattr(name) { 387 continue 388 } 389 390 value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0}) 391 if err != nil { 392 ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err) 393 return err 394 } 395 396 if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil { 397 ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err) 398 return err 399 } 400 } 401 return nil 402 } 403 404 // copyUpDescendantsLocked ensures that all descendants of d are copied up. 405 // 406 // Preconditions: 407 // - filesystem.renameMu must be locked. 408 // - d.dirMu must be locked. 409 // - d.isDir(). 410 func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error { 411 dirents, err := d.getDirentsLocked(ctx) 412 if err != nil { 413 return err 414 } 415 for _, dirent := range dirents { 416 if dirent.Name == "." || dirent.Name == ".." { 417 continue 418 } 419 child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds) 420 if err != nil { 421 return err 422 } 423 if err := child.copyUpLocked(ctx); err != nil { 424 return err 425 } 426 if child.isDir() { 427 child.dirMu.Lock() 428 err := child.copyUpDescendantsLocked(ctx, ds) 429 child.dirMu.Unlock() 430 if err != nil { 431 return err 432 } 433 } 434 } 435 return nil 436 }