github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/verity/filesystem.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package verity 16 17 import ( 18 "bytes" 19 "encoding/json" 20 "fmt" 21 "io" 22 "strconv" 23 "strings" 24 "sync/atomic" 25 26 "github.com/SagerNet/gvisor/pkg/abi/linux" 27 "github.com/SagerNet/gvisor/pkg/context" 28 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 29 "github.com/SagerNet/gvisor/pkg/fspath" 30 "github.com/SagerNet/gvisor/pkg/merkletree" 31 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 32 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 33 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 34 "github.com/SagerNet/gvisor/pkg/sync" 35 "github.com/SagerNet/gvisor/pkg/syserror" 36 "github.com/SagerNet/gvisor/pkg/usermem" 37 ) 38 39 // Sync implements vfs.FilesystemImpl.Sync. 40 func (fs *filesystem) Sync(ctx context.Context) error { 41 // All files should be read-only. 42 return nil 43 } 44 45 var dentrySlicePool = sync.Pool{ 46 New: func() interface{} { 47 ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity 48 return &ds 49 }, 50 } 51 52 func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { 53 if ds == nil { 54 ds = dentrySlicePool.Get().(*[]*dentry) 55 } 56 *ds = append(*ds, d) 57 return ds 58 } 59 60 // Preconditions: ds != nil. 61 func putDentrySlice(ds *[]*dentry) { 62 // Allow dentries to be GC'd. 63 for i := range *ds { 64 (*ds)[i] = nil 65 } 66 *ds = (*ds)[:0] 67 dentrySlicePool.Put(ds) 68 } 69 70 // renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls 71 // dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for 72 // writing. 73 // 74 // ds is a pointer-to-pointer since defer evaluates its arguments immediately, 75 // but dentry slices are allocated lazily, and it's much easier to say "defer 76 // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { 77 // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. 78 // +checklocksrelease:fs.renameMu 79 func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { 80 fs.renameMu.RUnlock() 81 if *ds == nil { 82 return 83 } 84 if len(**ds) != 0 { 85 fs.renameMu.Lock() 86 for _, d := range **ds { 87 d.checkDropLocked(ctx) 88 } 89 fs.renameMu.Unlock() 90 } 91 putDentrySlice(*ds) 92 } 93 94 // +checklocksrelease:fs.renameMu 95 func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { 96 if *ds == nil { 97 fs.renameMu.Unlock() 98 return 99 } 100 for _, d := range **ds { 101 d.checkDropLocked(ctx) 102 } 103 fs.renameMu.Unlock() 104 putDentrySlice(*ds) 105 } 106 107 // stepLocked resolves rp.Component() to an existing file, starting from the 108 // given directory. 109 // 110 // Dentries which may have a reference count of zero, and which therefore 111 // should be dropped once traversal is complete, are appended to ds. 112 // 113 // Preconditions: 114 // * fs.renameMu must be locked. 115 // * d.dirMu must be locked. 116 // * !rp.Done(). 117 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) { 118 if !d.isDir() { 119 return nil, syserror.ENOTDIR 120 } 121 122 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 123 return nil, err 124 } 125 126 afterSymlink: 127 name := rp.Component() 128 if name == "." { 129 rp.Advance() 130 return d, nil 131 } 132 if name == ".." { 133 if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { 134 return nil, err 135 } else if isRoot || d.parent == nil { 136 rp.Advance() 137 return d, nil 138 } 139 if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { 140 return nil, err 141 } 142 rp.Advance() 143 return d.parent, nil 144 } 145 child, err := fs.getChildLocked(ctx, d, name, ds) 146 if err != nil { 147 return nil, err 148 } 149 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 150 return nil, err 151 } 152 if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() { 153 target, err := child.readlink(ctx) 154 if err != nil { 155 return nil, err 156 } 157 if err := rp.HandleSymlink(target); err != nil { 158 return nil, err 159 } 160 goto afterSymlink // don't check the current directory again 161 } 162 rp.Advance() 163 return child, nil 164 } 165 166 // verifyChildLocked verifies the hash of child against the already verified 167 // hash of the parent to ensure the child is expected. verifyChild triggers a 168 // sentry panic if unexpected modifications to the file system are detected. In 169 // ErrorOnViolation mode it returns a syserror instead. 170 // 171 // Preconditions: 172 // * fs.renameMu must be locked. 173 // * d.dirMu must be locked. 174 func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, child *dentry) (*dentry, error) { 175 vfsObj := fs.vfsfs.VirtualFilesystem() 176 177 // Get the path to the child dentry. This is only used to provide path 178 // information in failure case. 179 childPath, err := vfsObj.PathnameWithDeleted(ctx, child.fs.rootDentry.lowerVD, child.lowerVD) 180 if err != nil { 181 return nil, err 182 } 183 184 fs.verityMu.RLock() 185 defer fs.verityMu.RUnlock() 186 // Read the offset of the child from the extended attributes of the 187 // corresponding Merkle tree file. 188 // This is the offset of the hash for child in its parent's Merkle tree 189 // file. 190 off, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{ 191 Root: child.lowerMerkleVD, 192 Start: child.lowerMerkleVD, 193 }, &vfs.GetXattrOptions{ 194 Name: merkleOffsetInParentXattr, 195 Size: sizeOfStringInt32, 196 }) 197 198 // The Merkle tree file for the child should have been created and 199 // contains the expected xattrs. If the file or the xattr does not 200 // exist, it indicates unexpected modifications to the file system. 201 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { 202 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleOffsetInParentXattr, childPath, err)) 203 } 204 if err != nil { 205 return nil, err 206 } 207 // The offset xattr should be an integer. If it's not, it indicates 208 // unexpected modifications to the file system. 209 offset, err := strconv.Atoi(off) 210 if err != nil { 211 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleOffsetInParentXattr, childPath, err)) 212 } 213 214 // Open parent Merkle tree file to read and verify child's hash. 215 parentMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 216 Root: parent.lowerMerkleVD, 217 Start: parent.lowerMerkleVD, 218 }, &vfs.OpenOptions{ 219 Flags: linux.O_RDONLY, 220 }) 221 222 // The parent Merkle tree file should have been created. If it's 223 // missing, it indicates an unexpected modification to the file system. 224 if linuxerr.Equals(linuxerr.ENOENT, err) { 225 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open parent Merkle file for %s: %v", childPath, err)) 226 } 227 if err != nil { 228 return nil, err 229 } 230 231 defer parentMerkleFD.DecRef(ctx) 232 233 // dataSize is the size of raw data for the Merkle tree. For a file, 234 // dataSize is the size of the whole file. For a directory, dataSize is 235 // the size of all its children's hashes. 236 dataSize, err := parentMerkleFD.GetXattr(ctx, &vfs.GetXattrOptions{ 237 Name: merkleSizeXattr, 238 Size: sizeOfStringInt32, 239 }) 240 241 // The Merkle tree file for the child should have been created and 242 // contains the expected xattrs. If the file or the xattr does not 243 // exist, it indicates unexpected modifications to the file system. 244 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { 245 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleSizeXattr, childPath, err)) 246 } 247 if err != nil { 248 return nil, err 249 } 250 251 // The dataSize xattr should be an integer. If it's not, it indicates 252 // unexpected modifications to the file system. 253 parentSize, err := strconv.Atoi(dataSize) 254 if err != nil { 255 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleSizeXattr, childPath, err)) 256 } 257 258 fdReader := FileReadWriteSeeker{ 259 FD: parentMerkleFD, 260 Ctx: ctx, 261 } 262 263 parentStat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{ 264 Root: parent.lowerVD, 265 Start: parent.lowerVD, 266 }, &vfs.StatOptions{}) 267 if linuxerr.Equals(linuxerr.ENOENT, err) { 268 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get parent stat for %s: %v", childPath, err)) 269 } 270 if err != nil { 271 return nil, err 272 } 273 274 // Since we are verifying against a directory Merkle tree, buf should 275 // contain the hash of the children in the parent Merkle tree when 276 // Verify returns with success. 277 var buf bytes.Buffer 278 parent.hashMu.RLock() 279 _, err = merkletree.Verify(&merkletree.VerifyParams{ 280 Out: &buf, 281 File: &fdReader, 282 Tree: &fdReader, 283 Size: int64(parentSize), 284 Name: parent.name, 285 Mode: uint32(parentStat.Mode), 286 UID: parentStat.UID, 287 GID: parentStat.GID, 288 Children: parent.childrenList, 289 HashAlgorithms: fs.alg.toLinuxHashAlg(), 290 ReadOffset: int64(offset), 291 ReadSize: int64(merkletree.DigestSize(fs.alg.toLinuxHashAlg())), 292 Expected: parent.hash, 293 DataAndTreeInSameFile: true, 294 }) 295 parent.hashMu.RUnlock() 296 if err != nil && err != io.EOF { 297 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Verification for %s failed: %v", childPath, err)) 298 } 299 300 // Cache child hash when it's verified the first time. 301 child.hashMu.Lock() 302 if len(child.hash) == 0 { 303 child.hash = buf.Bytes() 304 } 305 child.hashMu.Unlock() 306 return child, nil 307 } 308 309 // verifyStatAndChildrenLocked verifies the stat and children names against the 310 // verified hash. The mode/uid/gid and childrenNames of the file is cached 311 // after verified. 312 // 313 // Preconditions: d.dirMu must be locked. 314 func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry, stat linux.Statx) error { 315 vfsObj := fs.vfsfs.VirtualFilesystem() 316 317 // Get the path to the child dentry. This is only used to provide path 318 // information in failure case. 319 childPath, err := vfsObj.PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.lowerVD) 320 if err != nil { 321 return err 322 } 323 324 fs.verityMu.RLock() 325 defer fs.verityMu.RUnlock() 326 327 fd, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 328 Root: d.lowerMerkleVD, 329 Start: d.lowerMerkleVD, 330 }, &vfs.OpenOptions{ 331 Flags: linux.O_RDONLY, 332 }) 333 if linuxerr.Equals(linuxerr.ENOENT, err) { 334 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to open merkle file for %s: %v", childPath, err)) 335 } 336 if err != nil { 337 return err 338 } 339 340 defer fd.DecRef(ctx) 341 342 merkleSize, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{ 343 Name: merkleSizeXattr, 344 Size: sizeOfStringInt32, 345 }) 346 347 if linuxerr.Equals(linuxerr.ENODATA, err) { 348 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", merkleSizeXattr, childPath, err)) 349 } 350 if err != nil { 351 return err 352 } 353 354 size, err := strconv.Atoi(merkleSize) 355 if err != nil { 356 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleSizeXattr, childPath, err)) 357 } 358 359 if d.isDir() && len(d.childrenNames) == 0 { 360 childrenOffString, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{ 361 Name: childrenOffsetXattr, 362 Size: sizeOfStringInt32, 363 }) 364 365 if linuxerr.Equals(linuxerr.ENODATA, err) { 366 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenOffsetXattr, childPath, err)) 367 } 368 if err != nil { 369 return err 370 } 371 childrenOffset, err := strconv.Atoi(childrenOffString) 372 if err != nil { 373 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err)) 374 } 375 376 childrenSizeString, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{ 377 Name: childrenSizeXattr, 378 Size: sizeOfStringInt32, 379 }) 380 381 if linuxerr.Equals(linuxerr.ENODATA, err) { 382 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenSizeXattr, childPath, err)) 383 } 384 if err != nil { 385 return err 386 } 387 childrenSize, err := strconv.Atoi(childrenSizeString) 388 if err != nil { 389 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err)) 390 } 391 392 childrenNames := make([]byte, childrenSize) 393 if _, err := fd.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(childrenOffset), vfs.ReadOptions{}); err != nil { 394 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to read children map for %s: %v", childPath, err)) 395 } 396 397 if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil { 398 return fs.alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames of %s: %v", childPath, err)) 399 } 400 } 401 402 fdReader := FileReadWriteSeeker{ 403 FD: fd, 404 Ctx: ctx, 405 } 406 407 var buf bytes.Buffer 408 d.hashMu.RLock() 409 410 d.generateChildrenList() 411 412 params := &merkletree.VerifyParams{ 413 Out: &buf, 414 Tree: &fdReader, 415 Size: int64(size), 416 Name: d.name, 417 Mode: uint32(stat.Mode), 418 UID: stat.UID, 419 GID: stat.GID, 420 Children: d.childrenList, 421 HashAlgorithms: fs.alg.toLinuxHashAlg(), 422 ReadOffset: 0, 423 // Set read size to 0 so only the metadata is verified. 424 ReadSize: 0, 425 Expected: d.hash, 426 DataAndTreeInSameFile: false, 427 } 428 d.hashMu.RUnlock() 429 if atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR { 430 params.DataAndTreeInSameFile = true 431 } 432 433 if d.isSymlink() { 434 target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 435 Root: d.lowerVD, 436 Start: d.lowerVD, 437 }) 438 if err != nil { 439 return err 440 } 441 params.SymlinkTarget = target 442 } 443 444 if _, err := merkletree.Verify(params); err != nil && err != io.EOF { 445 return fs.alertIntegrityViolation(fmt.Sprintf("Verification stat for %s failed: %v", childPath, err)) 446 } 447 d.mode = uint32(stat.Mode) 448 d.uid = stat.UID 449 d.gid = stat.GID 450 d.size = uint32(size) 451 d.symlinkTarget = params.SymlinkTarget 452 return nil 453 } 454 455 // Preconditions: 456 // * fs.renameMu must be locked. 457 // * parent.dirMu must be locked. 458 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 459 if child, ok := parent.children[name]; ok { 460 // If verity is enabled on child, we should check again whether 461 // the file and the corresponding Merkle tree are as expected, 462 // in order to catch deletion/renaming after the last time it's 463 // accessed. 464 if child.verityEnabled() { 465 vfsObj := fs.vfsfs.VirtualFilesystem() 466 // Get the path to the child dentry. This is only used 467 // to provide path information in failure case. 468 path, err := vfsObj.PathnameWithDeleted(ctx, child.fs.rootDentry.lowerVD, child.lowerVD) 469 if err != nil { 470 return nil, err 471 } 472 473 childVD, err := parent.getLowerAt(ctx, vfsObj, name) 474 if linuxerr.Equals(linuxerr.ENOENT, err) { 475 // The file was previously accessed. If the 476 // file does not exist now, it indicates an 477 // unexpected modification to the file system. 478 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Target file %s is expected but missing", path)) 479 } 480 if err != nil { 481 return nil, err 482 } 483 defer childVD.DecRef(ctx) 484 485 childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name) 486 // The Merkle tree file was previous accessed. If it 487 // does not exist now, it indicates an unexpected 488 // modification to the file system. 489 if linuxerr.Equals(linuxerr.ENOENT, err) { 490 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Expected Merkle file for target %s but none found", path)) 491 } 492 if err != nil { 493 return nil, err 494 } 495 496 defer childMerkleVD.DecRef(ctx) 497 } 498 499 // If enabling verification on files/directories is not allowed 500 // during runtime, all cached children are already verified. If 501 // runtime enable is allowed and the parent directory is 502 // enabled, we should verify the child hash here because it may 503 // be cached before enabled. 504 if fs.allowRuntimeEnable { 505 if parent.verityEnabled() { 506 if _, err := fs.verifyChildLocked(ctx, parent, child); err != nil { 507 return nil, err 508 } 509 } 510 if child.verityEnabled() { 511 vfsObj := fs.vfsfs.VirtualFilesystem() 512 mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID) 513 stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{ 514 Root: child.lowerVD, 515 Start: child.lowerVD, 516 }, &vfs.StatOptions{ 517 Mask: mask, 518 }) 519 if err != nil { 520 return nil, err 521 } 522 if err := fs.verifyStatAndChildrenLocked(ctx, child, stat); err != nil { 523 return nil, err 524 } 525 } 526 } 527 return child, nil 528 } 529 child, err := fs.lookupAndVerifyLocked(ctx, parent, name) 530 if err != nil { 531 return nil, err 532 } 533 if parent.children == nil { 534 parent.children = make(map[string]*dentry) 535 } 536 parent.children[name] = child 537 // child's refcount is initially 0, so it may be dropped after traversal. 538 *ds = appendDentry(*ds, child) 539 return child, nil 540 } 541 542 // Preconditions: 543 // * fs.renameMu must be locked. 544 // * parent.dirMu must be locked. 545 func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) { 546 vfsObj := fs.vfsfs.VirtualFilesystem() 547 548 if parent.verityEnabled() { 549 if _, ok := parent.childrenNames[name]; !ok { 550 return nil, syserror.ENOENT 551 } 552 } 553 554 parentPath, err := vfsObj.PathnameWithDeleted(ctx, parent.fs.rootDentry.lowerVD, parent.lowerVD) 555 if err != nil { 556 return nil, err 557 } 558 559 childVD, err := parent.getLowerAt(ctx, vfsObj, name) 560 if parent.verityEnabled() && linuxerr.Equals(linuxerr.ENOENT, err) { 561 return nil, fs.alertIntegrityViolation(fmt.Sprintf("file %s expected but not found", parentPath+"/"+name)) 562 } 563 if err != nil { 564 return nil, err 565 } 566 567 // The dentry needs to be cleaned up if any error occurs. IncRef will be 568 // called if a verity child dentry is successfully created. 569 defer childVD.DecRef(ctx) 570 571 childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name) 572 if err != nil { 573 if linuxerr.Equals(linuxerr.ENOENT, err) { 574 if parent.verityEnabled() { 575 return nil, fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath+"/"+name)) 576 } 577 childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 578 Root: parent.lowerVD, 579 Start: parent.lowerVD, 580 Path: fspath.Parse(merklePrefix + name), 581 }, &vfs.OpenOptions{ 582 Flags: linux.O_RDWR | linux.O_CREAT, 583 Mode: 0644, 584 }) 585 if err != nil { 586 return nil, err 587 } 588 childMerkleFD.DecRef(ctx) 589 childMerkleVD, err = parent.getLowerAt(ctx, vfsObj, merklePrefix+name) 590 if err != nil { 591 return nil, err 592 } 593 } else { 594 return nil, err 595 } 596 } 597 598 // Clear the Merkle tree file if they are to be generated at runtime. 599 // TODO(b/182315468): Optimize the Merkle tree generate process to 600 // allow only updating certain files/directories. 601 if fs.allowRuntimeEnable { 602 childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 603 Root: childMerkleVD, 604 Start: childMerkleVD, 605 }, &vfs.OpenOptions{ 606 Flags: linux.O_RDWR | linux.O_TRUNC, 607 Mode: 0644, 608 }) 609 if err != nil { 610 return nil, err 611 } 612 childMerkleFD.DecRef(ctx) 613 } 614 615 // The dentry needs to be cleaned up if any error occurs. IncRef will be 616 // called if a verity child dentry is successfully created. 617 defer childMerkleVD.DecRef(ctx) 618 619 mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID) 620 stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{ 621 Root: childVD, 622 Start: childVD, 623 }, &vfs.StatOptions{ 624 Mask: mask, 625 }) 626 if err != nil { 627 return nil, err 628 } 629 630 child := fs.newDentry() 631 child.lowerVD = childVD 632 child.lowerMerkleVD = childMerkleVD 633 634 // Increase the reference for both childVD and childMerkleVD as they are 635 // held by child. If this function fails and the child is destroyed, the 636 // references will be decreased in destroyLocked. 637 childVD.IncRef() 638 childMerkleVD.IncRef() 639 640 child.name = name 641 642 child.mode = uint32(stat.Mode) 643 child.uid = stat.UID 644 child.gid = stat.GID 645 child.childrenNames = make(map[string]struct{}) 646 647 // Verify child hash. This should always be performed unless in 648 // allowRuntimeEnable mode and the parent directory hasn't been enabled 649 // yet. 650 if parent.verityEnabled() { 651 if _, err := fs.verifyChildLocked(ctx, parent, child); err != nil { 652 child.destroyLocked(ctx) 653 return nil, err 654 } 655 } 656 if child.verityEnabled() { 657 if err := fs.verifyStatAndChildrenLocked(ctx, child, stat); err != nil { 658 child.destroyLocked(ctx) 659 return nil, err 660 } 661 } 662 663 parent.IncRef() 664 child.parent = parent 665 666 return child, nil 667 } 668 669 // walkParentDirLocked resolves all but the last path component of rp to an 670 // existing directory, starting from the given directory (which is usually 671 // rp.Start().Impl().(*dentry)). It does not check that the returned directory 672 // is searchable by the provider of rp. 673 // 674 // Preconditions: 675 // * fs.renameMu must be locked. 676 // * !rp.Done(). 677 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { 678 for !rp.Final() { 679 d.dirMu.Lock() 680 next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 681 d.dirMu.Unlock() 682 if err != nil { 683 return nil, err 684 } 685 d = next 686 } 687 if !d.isDir() { 688 return nil, syserror.ENOTDIR 689 } 690 return d, nil 691 } 692 693 // resolveLocked resolves rp to an existing file. 694 // 695 // Preconditions: fs.renameMu must be locked. 696 func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { 697 d := rp.Start().Impl().(*dentry) 698 for !rp.Done() { 699 d.dirMu.Lock() 700 next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 701 d.dirMu.Unlock() 702 if err != nil { 703 return nil, err 704 } 705 d = next 706 } 707 if rp.MustBeDir() && !d.isDir() { 708 return nil, syserror.ENOTDIR 709 } 710 return d, nil 711 } 712 713 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 714 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 715 // Verity file system is read-only. 716 if ats&vfs.MayWrite != 0 { 717 return linuxerr.EROFS 718 } 719 var ds *[]*dentry 720 fs.renameMu.RLock() 721 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 722 d, err := fs.resolveLocked(ctx, rp, &ds) 723 if err != nil { 724 return err 725 } 726 return d.checkPermissions(creds, ats) 727 } 728 729 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 730 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 731 var ds *[]*dentry 732 fs.renameMu.RLock() 733 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 734 d, err := fs.resolveLocked(ctx, rp, &ds) 735 if err != nil { 736 return nil, err 737 } 738 if opts.CheckSearchable { 739 if !d.isDir() { 740 return nil, syserror.ENOTDIR 741 } 742 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 743 return nil, err 744 } 745 } 746 d.IncRef() 747 return &d.vfsd, nil 748 } 749 750 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 751 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 752 var ds *[]*dentry 753 fs.renameMu.RLock() 754 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 755 start := rp.Start().Impl().(*dentry) 756 d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 757 if err != nil { 758 return nil, err 759 } 760 d.IncRef() 761 return &d.vfsd, nil 762 } 763 764 // LinkAt implements vfs.FilesystemImpl.LinkAt. 765 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 766 // Verity file system is read-only. 767 return linuxerr.EROFS 768 } 769 770 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 771 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 772 // Verity file system is read-only. 773 return linuxerr.EROFS 774 } 775 776 // MknodAt implements vfs.FilesystemImpl.MknodAt. 777 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 778 // Verity file system is read-only. 779 return linuxerr.EROFS 780 } 781 782 // OpenAt implements vfs.FilesystemImpl.OpenAt. 783 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 784 // Verity fs is read-only. 785 if opts.Flags&(linux.O_WRONLY|linux.O_CREAT) != 0 { 786 return nil, linuxerr.EROFS 787 } 788 789 var ds *[]*dentry 790 fs.renameMu.RLock() 791 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 792 793 start := rp.Start().Impl().(*dentry) 794 if rp.Done() { 795 return start.openLocked(ctx, rp, &opts) 796 } 797 798 afterTrailingSymlink: 799 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 800 if err != nil { 801 return nil, err 802 } 803 804 // Check for search permission in the parent directory. 805 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 806 return nil, err 807 } 808 809 // Open existing child or follow symlink. 810 parent.dirMu.Lock() 811 child, err := fs.stepLocked(ctx, rp, parent, false /*mayFollowSymlinks*/, &ds) 812 parent.dirMu.Unlock() 813 if err != nil { 814 return nil, err 815 } 816 if child.isSymlink() && rp.ShouldFollowSymlink() { 817 target, err := child.readlink(ctx) 818 if err != nil { 819 return nil, err 820 } 821 if err := rp.HandleSymlink(target); err != nil { 822 return nil, err 823 } 824 start = parent 825 goto afterTrailingSymlink 826 } 827 return child.openLocked(ctx, rp, &opts) 828 } 829 830 // Preconditions: fs.renameMu must be locked. 831 func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 832 // Users should not open the Merkle tree files. Those are for verity fs 833 // use only. 834 if strings.Contains(d.name, merklePrefix) { 835 return nil, linuxerr.EPERM 836 } 837 ats := vfs.AccessTypesForOpenFlags(opts) 838 if err := d.checkPermissions(rp.Credentials(), ats); err != nil { 839 return nil, err 840 } 841 842 // Verity fs is read-only. 843 if ats&vfs.MayWrite != 0 { 844 return nil, linuxerr.EROFS 845 } 846 847 // Get the path to the target file. This is only used to provide path 848 // information in failure case. 849 path, err := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.lowerVD) 850 if err != nil { 851 return nil, err 852 } 853 854 // Open the file in the underlying file system. 855 lowerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 856 Root: d.lowerVD, 857 Start: d.lowerVD, 858 }, opts) 859 860 // The file should exist, as we succeeded in finding its dentry. If it's 861 // missing, it indicates an unexpected modification to the file system. 862 if err != nil { 863 if linuxerr.Equals(linuxerr.ENOENT, err) { 864 return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("File %s expected but not found", path)) 865 } 866 return nil, err 867 } 868 869 // lowerFD needs to be cleaned up if any error occurs. IncRef will be 870 // called if a verity FD is successfully created. 871 defer lowerFD.DecRef(ctx) 872 873 // Open the Merkle tree file corresponding to the current file/directory 874 // to be used later for verifying Read/Walk. 875 merkleReader, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 876 Root: d.lowerMerkleVD, 877 Start: d.lowerMerkleVD, 878 }, &vfs.OpenOptions{ 879 Flags: linux.O_RDONLY, 880 }) 881 882 // The Merkle tree file should exist, as we succeeded in finding its 883 // dentry. If it's missing, it indicates an unexpected modification to 884 // the file system. 885 if err != nil { 886 if linuxerr.Equals(linuxerr.ENOENT, err) { 887 return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path)) 888 } 889 return nil, err 890 } 891 892 // merkleReader needs to be cleaned up if any error occurs. IncRef will 893 // be called if a verity FD is successfully created. 894 defer merkleReader.DecRef(ctx) 895 896 lowerFlags := lowerFD.StatusFlags() 897 lowerFDOpts := lowerFD.Options() 898 var merkleWriter *vfs.FileDescription 899 var parentMerkleWriter *vfs.FileDescription 900 901 // Only open the Merkle tree files for write if in allowRuntimeEnable 902 // mode. 903 if d.fs.allowRuntimeEnable { 904 merkleWriter, err = rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 905 Root: d.lowerMerkleVD, 906 Start: d.lowerMerkleVD, 907 }, &vfs.OpenOptions{ 908 Flags: linux.O_WRONLY | linux.O_APPEND, 909 }) 910 if err != nil { 911 if linuxerr.Equals(linuxerr.ENOENT, err) { 912 return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path)) 913 } 914 return nil, err 915 } 916 // merkleWriter is cleaned up if any error occurs. IncRef will 917 // be called if a verity FD is created successfully. 918 defer merkleWriter.DecRef(ctx) 919 920 if d.parent != nil { 921 parentMerkleWriter, err = rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 922 Root: d.parent.lowerMerkleVD, 923 Start: d.parent.lowerMerkleVD, 924 }, &vfs.OpenOptions{ 925 Flags: linux.O_WRONLY | linux.O_APPEND, 926 }) 927 if err != nil { 928 if linuxerr.Equals(linuxerr.ENOENT, err) { 929 parentPath, _ := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.parent.lowerVD) 930 return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath)) 931 } 932 return nil, err 933 } 934 // parentMerkleWriter is cleaned up if any error occurs. IncRef 935 // will be called if a verity FD is created successfully. 936 defer parentMerkleWriter.DecRef(ctx) 937 } 938 } 939 940 fd := &fileDescription{ 941 d: d, 942 lowerFD: lowerFD, 943 merkleReader: merkleReader, 944 merkleWriter: merkleWriter, 945 parentMerkleWriter: parentMerkleWriter, 946 isDir: d.isDir(), 947 } 948 949 if err := fd.vfsfd.Init(fd, lowerFlags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil { 950 return nil, err 951 } 952 lowerFD.IncRef() 953 merkleReader.IncRef() 954 if merkleWriter != nil { 955 merkleWriter.IncRef() 956 } 957 if parentMerkleWriter != nil { 958 parentMerkleWriter.IncRef() 959 } 960 return &fd.vfsfd, err 961 } 962 963 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 964 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 965 var ds *[]*dentry 966 fs.renameMu.RLock() 967 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 968 d, err := fs.resolveLocked(ctx, rp, &ds) 969 if err != nil { 970 return "", err 971 } 972 return d.readlink(ctx) 973 } 974 975 // RenameAt implements vfs.FilesystemImpl.RenameAt. 976 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 977 // Verity file system is read-only. 978 return linuxerr.EROFS 979 } 980 981 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 982 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 983 // Verity file system is read-only. 984 return linuxerr.EROFS 985 } 986 987 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 988 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 989 // Verity file system is read-only. 990 return linuxerr.EROFS 991 } 992 993 // StatAt implements vfs.FilesystemImpl.StatAt. 994 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 995 var ds *[]*dentry 996 fs.renameMu.RLock() 997 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 998 d, err := fs.resolveLocked(ctx, rp, &ds) 999 if err != nil { 1000 return linux.Statx{}, err 1001 } 1002 1003 var stat linux.Statx 1004 stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{ 1005 Root: d.lowerVD, 1006 Start: d.lowerVD, 1007 }, &opts) 1008 if err != nil { 1009 return linux.Statx{}, err 1010 } 1011 d.dirMu.Lock() 1012 if d.verityEnabled() { 1013 if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { 1014 return linux.Statx{}, err 1015 } 1016 } 1017 d.dirMu.Unlock() 1018 return stat, nil 1019 } 1020 1021 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 1022 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 1023 // TODO(b/159261227): Implement StatFSAt. 1024 return linux.Statfs{}, nil 1025 } 1026 1027 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 1028 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 1029 // Verity file system is read-only. 1030 return linuxerr.EROFS 1031 } 1032 1033 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 1034 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1035 // Verity file system is read-only. 1036 return linuxerr.EROFS 1037 } 1038 1039 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 1040 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 1041 var ds *[]*dentry 1042 fs.renameMu.RLock() 1043 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1044 if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil { 1045 return nil, err 1046 } 1047 return nil, linuxerr.ECONNREFUSED 1048 } 1049 1050 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1051 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1052 var ds *[]*dentry 1053 fs.renameMu.RLock() 1054 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1055 d, err := fs.resolveLocked(ctx, rp, &ds) 1056 if err != nil { 1057 return nil, err 1058 } 1059 lowerVD := d.lowerVD 1060 return fs.vfsfs.VirtualFilesystem().ListXattrAt(ctx, d.fs.creds, &vfs.PathOperation{ 1061 Root: lowerVD, 1062 Start: lowerVD, 1063 }, size) 1064 } 1065 1066 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1067 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1068 var ds *[]*dentry 1069 fs.renameMu.RLock() 1070 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1071 d, err := fs.resolveLocked(ctx, rp, &ds) 1072 if err != nil { 1073 return "", err 1074 } 1075 lowerVD := d.lowerVD 1076 return fs.vfsfs.VirtualFilesystem().GetXattrAt(ctx, d.fs.creds, &vfs.PathOperation{ 1077 Root: lowerVD, 1078 Start: lowerVD, 1079 }, &opts) 1080 } 1081 1082 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1083 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1084 // Verity file system is read-only. 1085 return linuxerr.EROFS 1086 } 1087 1088 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1089 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1090 // Verity file system is read-only. 1091 return linuxerr.EROFS 1092 } 1093 1094 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1095 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1096 fs.renameMu.RLock() 1097 defer fs.renameMu.RUnlock() 1098 mnt := vd.Mount() 1099 d := vd.Dentry().Impl().(*dentry) 1100 for { 1101 if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { 1102 return vfs.PrependPathAtVFSRootError{} 1103 } 1104 if &d.vfsd == mnt.Root() { 1105 return nil 1106 } 1107 if d.parent == nil { 1108 return vfs.PrependPathAtNonMountRootError{} 1109 } 1110 b.PrependComponent(d.name) 1111 d = d.parent 1112 } 1113 }