github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/overlay/copy_up.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "fmt" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/fspath" 25 "github.com/SagerNet/gvisor/pkg/hostarch" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 27 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 28 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 ) 31 32 func (d *dentry) isCopiedUp() bool { 33 return atomic.LoadUint32(&d.copiedUp) != 0 34 } 35 36 // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok(). 37 // 38 // Preconditions: filesystem.renameMu must be locked. 39 func (d *dentry) copyUpLocked(ctx context.Context) error { 40 // Fast path. 41 if d.isCopiedUp() { 42 return nil 43 } 44 45 // Attach our credentials to the context, as some VFS operations use 46 // credentials from context rather an take an explicit creds parameter. 47 ctx = auth.ContextWithCredentials(ctx, d.fs.creds) 48 49 ftype := atomic.LoadUint32(&d.mode) & linux.S_IFMT 50 switch ftype { 51 case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR: 52 // Can be copied-up. 53 default: 54 // Can't be copied-up. 55 return linuxerr.EPERM 56 } 57 58 // Ensure that our parent directory is copied-up. 59 if d.parent == nil { 60 // d is a filesystem root with no upper layer. 61 return linuxerr.EROFS 62 } 63 if err := d.parent.copyUpLocked(ctx); err != nil { 64 return err 65 } 66 67 d.copyMu.Lock() 68 defer d.copyMu.Unlock() 69 if d.upperVD.Ok() { 70 // Raced with another call to d.copyUpLocked(). 71 return nil 72 } 73 if d.vfsd.IsDead() { 74 // Raced with deletion of d. 75 return syserror.ENOENT 76 } 77 78 // Obtain settable timestamps from the lower layer. 79 vfsObj := d.fs.vfsfs.VirtualFilesystem() 80 oldpop := vfs.PathOperation{ 81 Root: d.lowerVDs[0], 82 Start: d.lowerVDs[0], 83 } 84 const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME 85 oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{ 86 Mask: timestampsMask, 87 }) 88 if err != nil { 89 return err 90 } 91 92 // Perform copy-up. 93 newpop := vfs.PathOperation{ 94 Root: d.parent.upperVD, 95 Start: d.parent.upperVD, 96 Path: fspath.Parse(d.name), 97 } 98 // Used during copy-up of memory-mapped regular files. 99 var mmapOpts *memmap.MMapOpts 100 cleanupUndoCopyUp := func() { 101 var err error 102 if ftype == linux.S_IFDIR { 103 err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop) 104 } else { 105 err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop) 106 } 107 if err != nil { 108 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err)) 109 } 110 if d.upperVD.Ok() { 111 d.upperVD.DecRef(ctx) 112 d.upperVD = vfs.VirtualDentry{} 113 } 114 } 115 switch ftype { 116 case linux.S_IFREG: 117 oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{ 118 Flags: linux.O_RDONLY, 119 }) 120 if err != nil { 121 return err 122 } 123 defer oldFD.DecRef(ctx) 124 newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{ 125 Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL, 126 Mode: linux.FileMode(d.mode &^ linux.S_IFMT), 127 }) 128 if err != nil { 129 return err 130 } 131 defer newFD.DecRef(ctx) 132 if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil { 133 cleanupUndoCopyUp() 134 return err 135 } 136 d.mapsMu.Lock() 137 defer d.mapsMu.Unlock() 138 if d.wrappedMappable != nil { 139 // We may have memory mappings of the file on the lower layer. 140 // Switch to mapping the file on the upper layer instead. 141 mmapOpts = &memmap.MMapOpts{ 142 Perms: hostarch.ReadWrite, 143 MaxPerms: hostarch.ReadWrite, 144 } 145 if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { 146 cleanupUndoCopyUp() 147 return err 148 } 149 if mmapOpts.MappingIdentity != nil { 150 mmapOpts.MappingIdentity.DecRef(ctx) 151 } 152 // Don't actually switch Mappables until the end of copy-up; see 153 // below for why. 154 } 155 if err := newFD.SetStat(ctx, vfs.SetStatOptions{ 156 Stat: linux.Statx{ 157 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 158 UID: d.uid, 159 GID: d.gid, 160 Atime: oldStat.Atime, 161 Mtime: oldStat.Mtime, 162 }, 163 }); err != nil { 164 cleanupUndoCopyUp() 165 return err 166 } 167 d.upperVD = newFD.VirtualDentry() 168 d.upperVD.IncRef() 169 170 case linux.S_IFDIR: 171 if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{ 172 Mode: linux.FileMode(d.mode &^ linux.S_IFMT), 173 }); err != nil { 174 return err 175 } 176 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 177 Stat: linux.Statx{ 178 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 179 UID: d.uid, 180 GID: d.gid, 181 Atime: oldStat.Atime, 182 Mtime: oldStat.Mtime, 183 }, 184 }); err != nil { 185 cleanupUndoCopyUp() 186 return err 187 } 188 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 189 if err != nil { 190 cleanupUndoCopyUp() 191 return err 192 } 193 d.upperVD = upperVD 194 195 case linux.S_IFLNK: 196 target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop) 197 if err != nil { 198 return err 199 } 200 if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil { 201 return err 202 } 203 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 204 Stat: linux.Statx{ 205 Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 206 Mode: uint16(d.mode), 207 UID: d.uid, 208 GID: d.gid, 209 Atime: oldStat.Atime, 210 Mtime: oldStat.Mtime, 211 }, 212 }); err != nil { 213 cleanupUndoCopyUp() 214 return err 215 } 216 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 217 if err != nil { 218 cleanupUndoCopyUp() 219 return err 220 } 221 d.upperVD = upperVD 222 223 case linux.S_IFBLK, linux.S_IFCHR: 224 if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{ 225 Mode: linux.FileMode(d.mode), 226 DevMajor: oldStat.RdevMajor, 227 DevMinor: oldStat.RdevMinor, 228 }); err != nil { 229 return err 230 } 231 if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ 232 Stat: linux.Statx{ 233 Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, 234 UID: d.uid, 235 GID: d.gid, 236 Atime: oldStat.Atime, 237 Mtime: oldStat.Mtime, 238 }, 239 }); err != nil { 240 cleanupUndoCopyUp() 241 return err 242 } 243 upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{}) 244 if err != nil { 245 cleanupUndoCopyUp() 246 return err 247 } 248 d.upperVD = upperVD 249 250 default: 251 // Should have rejected this at the beginning of this function? 252 panic(fmt.Sprintf("unexpected file type %o", ftype)) 253 } 254 255 if err := d.copyXattrsLocked(ctx); err != nil { 256 cleanupUndoCopyUp() 257 return err 258 } 259 260 // Update the dentry's device and inode numbers (except for directories, 261 // for which these remain overlay-assigned). 262 if ftype != linux.S_IFDIR { 263 upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ 264 Root: d.upperVD, 265 Start: d.upperVD, 266 }, &vfs.StatOptions{ 267 Mask: linux.STATX_INO, 268 }) 269 if err != nil { 270 cleanupUndoCopyUp() 271 return err 272 } 273 if upperStat.Mask&linux.STATX_INO == 0 { 274 cleanupUndoCopyUp() 275 return linuxerr.EREMOTE 276 } 277 atomic.StoreUint32(&d.devMajor, upperStat.DevMajor) 278 atomic.StoreUint32(&d.devMinor, upperStat.DevMinor) 279 atomic.StoreUint64(&d.ino, upperStat.Ino) 280 } 281 282 if mmapOpts != nil && mmapOpts.Mappable != nil { 283 // Note that if mmapOpts != nil, then d.mapsMu is locked for writing 284 // (from the S_IFREG path above). 285 286 // Propagate mappings of d to the new Mappable. Remember which mappings 287 // we added so we can remove them on failure. 288 upperMappable := mmapOpts.Mappable 289 allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange) 290 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 291 added := make(memmap.MappingsOfRange) 292 for m := range seg.Value() { 293 if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil { 294 for m := range added { 295 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 296 } 297 for mr, mappings := range allAdded { 298 for m := range mappings { 299 upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable) 300 } 301 } 302 return err 303 } 304 added[m] = struct{}{} 305 } 306 allAdded[seg.Range()] = added 307 } 308 309 // Switch to the new Mappable. We do this at the end of copy-up 310 // because: 311 // 312 // - We need to switch Mappables (by changing d.wrappedMappable) before 313 // invalidating Translations from the old Mappable (to pick up 314 // Translations from the new one). 315 // 316 // - We need to lock d.dataMu while changing d.wrappedMappable, but 317 // must invalidate Translations with d.dataMu unlocked (due to lock 318 // ordering). 319 // 320 // - Consequently, once we unlock d.dataMu, other threads may 321 // immediately observe the new (copied-up) Mappable, which we want to 322 // delay until copy-up is guaranteed to succeed. 323 d.dataMu.Lock() 324 lowerMappable := d.wrappedMappable 325 d.wrappedMappable = upperMappable 326 d.dataMu.Unlock() 327 d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{}) 328 329 // Remove mappings from the old Mappable. 330 for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { 331 for m := range seg.Value() { 332 lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) 333 } 334 } 335 d.lowerMappings.RemoveAll() 336 } 337 338 atomic.StoreUint32(&d.copiedUp, 1) 339 return nil 340 } 341 342 // copyXattrsLocked copies a subset of lower's extended attributes to upper. 343 // Attributes that configure an overlay in the lower are not copied up. 344 // 345 // Preconditions: d.copyMu must be locked for writing. 346 func (d *dentry) copyXattrsLocked(ctx context.Context) error { 347 vfsObj := d.fs.vfsfs.VirtualFilesystem() 348 lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]} 349 upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD} 350 351 lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0) 352 if err != nil { 353 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 354 // There are no guarantees as to the contents of lowerXattrs. 355 return nil 356 } 357 ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err) 358 return err 359 } 360 361 for _, name := range lowerXattrs { 362 // Do not copy up overlay attributes. 363 if isOverlayXattr(name) { 364 continue 365 } 366 367 value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0}) 368 if err != nil { 369 ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err) 370 return err 371 } 372 373 if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil { 374 ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err) 375 return err 376 } 377 } 378 return nil 379 } 380 381 // copyUpDescendantsLocked ensures that all descendants of d are copied up. 382 // 383 // Preconditions: 384 // * filesystem.renameMu must be locked. 385 // * d.dirMu must be locked. 386 // * d.isDir(). 387 func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error { 388 dirents, err := d.getDirentsLocked(ctx) 389 if err != nil { 390 return err 391 } 392 for _, dirent := range dirents { 393 if dirent.Name == "." || dirent.Name == ".." { 394 continue 395 } 396 child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds) 397 if err != nil { 398 return err 399 } 400 if err := child.copyUpLocked(ctx); err != nil { 401 return err 402 } 403 if child.isDir() { 404 child.dirMu.Lock() 405 err := child.copyUpDescendantsLocked(ctx, ds) 406 child.dirMu.Unlock() 407 if err != nil { 408 return err 409 } 410 } 411 } 412 return nil 413 }