github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/verity/verity.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package verity provides a filesystem implementation that is a wrapper of 16 // another file system. 17 // The verity file system provides integrity check for the underlying file 18 // system by providing verification for path traversals and each read. 19 // The verity file system is read-only, except for one case: when 20 // allowRuntimeEnable is true, additional Merkle files can be generated using 21 // the FS_IOC_ENABLE_VERITY ioctl. 22 // 23 // Lock order: 24 // 25 // filesystem.renameMu 26 // dentry.dirMu 27 // fileDescription.mu 28 // filesystem.verityMu 29 // dentry.hashMu 30 // 31 // Locking dentry.dirMu in multiple dentries requires that parent dentries are 32 // locked before child dentries, and that filesystem.renameMu is locked to 33 // stabilize this relationship. 34 package verity 35 36 import ( 37 "bytes" 38 "encoding/hex" 39 "encoding/json" 40 "fmt" 41 "math" 42 "sort" 43 "strconv" 44 "strings" 45 "sync/atomic" 46 47 "github.com/SagerNet/gvisor/pkg/abi/linux" 48 "github.com/SagerNet/gvisor/pkg/context" 49 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 50 "github.com/SagerNet/gvisor/pkg/fspath" 51 "github.com/SagerNet/gvisor/pkg/hostarch" 52 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 53 "github.com/SagerNet/gvisor/pkg/merkletree" 54 "github.com/SagerNet/gvisor/pkg/refsvfs2" 55 "github.com/SagerNet/gvisor/pkg/safemem" 56 "github.com/SagerNet/gvisor/pkg/sentry/arch" 57 fslock "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 58 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 59 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 60 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 61 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 62 "github.com/SagerNet/gvisor/pkg/sync" 63 "github.com/SagerNet/gvisor/pkg/syserror" 64 "github.com/SagerNet/gvisor/pkg/usermem" 65 ) 66 67 const ( 68 // Name is the default filesystem name. 69 Name = "verity" 70 71 // merklePrefix is the prefix of the Merkle tree files. For example, the Merkle 72 // tree file for "/foo" is "/.merkle.verity.foo". 73 merklePrefix = ".merkle.verity." 74 75 // merkleRootPrefix is the prefix of the Merkle tree root file. This 76 // needs to be different from merklePrefix to avoid name collision. 77 merkleRootPrefix = ".merkleroot.verity." 78 79 // merkleOffsetInParentXattr is the extended attribute name specifying the 80 // offset of the child hash in its parent's Merkle tree. 81 merkleOffsetInParentXattr = "user.merkle.offset" 82 83 // merkleSizeXattr is the extended attribute name specifying the size of data 84 // hashed by the corresponding Merkle tree. For a regular file, this is the 85 // file size. For a directory, this is the size of all its children's hashes. 86 merkleSizeXattr = "user.merkle.size" 87 88 // childrenOffsetXattr is the extended attribute name specifying the 89 // names of the offset of the serialized children names in the Merkle 90 // tree file. 91 childrenOffsetXattr = "user.merkle.childrenOffset" 92 93 // childrenSizeXattr is the extended attribute name specifying the size 94 // of the serialized children names. 95 childrenSizeXattr = "user.merkle.childrenSize" 96 97 // sizeOfStringInt32 is the size for a 32 bit integer stored as string in 98 // extended attributes. The maximum value of a 32 bit integer has 10 digits. 99 sizeOfStringInt32 = 10 100 ) 101 102 var ( 103 // verityMu synchronizes concurrent operations that enable verity and perform 104 // verification checks. 105 verityMu sync.RWMutex 106 ) 107 108 // Mount option names for verityfs. 109 const ( 110 moptLowerPath = "lower_path" 111 moptRootHash = "root_hash" 112 moptRootName = "root_name" 113 ) 114 115 // HashAlgorithm is a type specifying the algorithm used to hash the file 116 // content. 117 type HashAlgorithm int 118 119 // ViolationAction is a type specifying the action when an integrity violation 120 // is detected. 121 type ViolationAction int 122 123 const ( 124 // PanicOnViolation terminates the sentry on detected violation. 125 PanicOnViolation ViolationAction = 0 126 // ErrorOnViolation returns an error from the violating system call on 127 // detected violation. 128 ErrorOnViolation = 1 129 ) 130 131 // Currently supported hashing algorithms include SHA256 and SHA512. 132 const ( 133 SHA256 HashAlgorithm = iota 134 SHA512 135 ) 136 137 func (alg HashAlgorithm) toLinuxHashAlg() int { 138 switch alg { 139 case SHA256: 140 return linux.FS_VERITY_HASH_ALG_SHA256 141 case SHA512: 142 return linux.FS_VERITY_HASH_ALG_SHA512 143 default: 144 return 0 145 } 146 } 147 148 // FilesystemType implements vfs.FilesystemType. 149 // 150 // +stateify savable 151 type FilesystemType struct{} 152 153 // filesystem implements vfs.FilesystemImpl. 154 // 155 // +stateify savable 156 type filesystem struct { 157 vfsfs vfs.Filesystem 158 159 // creds is a copy of the filesystem's creator's credentials, which are 160 // used for accesses to the underlying file system. creds is immutable. 161 creds *auth.Credentials 162 163 // allowRuntimeEnable is true if using ioctl with FS_IOC_ENABLE_VERITY 164 // to build Merkle trees in the verity file system is allowed. If this 165 // is false, no new Merkle trees can be built, and only the files that 166 // had Merkle trees before startup (e.g. from a host filesystem mounted 167 // with gofer fs) can be verified. 168 allowRuntimeEnable bool 169 170 // lowerMount is the underlying file system mount. 171 lowerMount *vfs.Mount 172 173 // rootDentry is the mount root Dentry for this file system, which 174 // stores the root hash of the whole file system in bytes. 175 rootDentry *dentry 176 177 // alg is the algorithms used to hash the files in the verity file 178 // system. 179 alg HashAlgorithm 180 181 // action specifies the action towards detected violation. 182 action ViolationAction 183 184 // opts is the string mount options passed to opts.Data. 185 opts string 186 187 // renameMu synchronizes renaming with non-renaming operations in order 188 // to ensure consistent lock ordering between dentry.dirMu in different 189 // dentries. 190 renameMu sync.RWMutex `state:"nosave"` 191 192 // verityMu synchronizes enabling verity files, protects files or 193 // directories from being enabled by different threads simultaneously. 194 // It also ensures that verity does not access files that are being 195 // enabled. 196 // 197 // Also, the directory Merkle trees depends on the generated trees of 198 // its children. So they shouldn't be enabled the same time. This lock 199 // is for the whole file system to ensure that no more than one file is 200 // enabled the same time. 201 verityMu sync.RWMutex `state:"nosave"` 202 } 203 204 // InternalFilesystemOptions may be passed as 205 // vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem. 206 // 207 // +stateify savable 208 type InternalFilesystemOptions struct { 209 // LowerName is the name of the filesystem wrapped by verity fs. 210 LowerName string 211 212 // Alg is the algorithms used to hash the files in the verity file 213 // system. 214 Alg HashAlgorithm 215 216 // AllowRuntimeEnable specifies whether the verity file system allows 217 // enabling verification for files (i.e. building Merkle trees) during 218 // runtime. 219 AllowRuntimeEnable bool 220 221 // LowerGetFSOptions is the file system option for the lower layer file 222 // system wrapped by verity file system. 223 LowerGetFSOptions vfs.GetFilesystemOptions 224 225 // Action specifies the action on an integrity violation. 226 Action ViolationAction 227 } 228 229 // Name implements vfs.FilesystemType.Name. 230 func (FilesystemType) Name() string { 231 return Name 232 } 233 234 // Release implements vfs.FilesystemType.Release. 235 func (FilesystemType) Release(ctx context.Context) {} 236 237 // alertIntegrityViolation alerts a violation of integrity, which usually means 238 // unexpected modification to the file system is detected. In ErrorOnViolation 239 // mode, it returns EIO, otherwise it panic. 240 func (fs *filesystem) alertIntegrityViolation(msg string) error { 241 if fs.action == ErrorOnViolation { 242 return syserror.EIO 243 } 244 panic(msg) 245 } 246 247 // GetFilesystem implements vfs.FilesystemType.GetFilesystem. 248 func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { 249 mopts := vfs.GenericParseMountOptions(opts.Data) 250 var rootHash []byte 251 if encodedRootHash, ok := mopts[moptRootHash]; ok { 252 delete(mopts, moptRootHash) 253 hash, err := hex.DecodeString(encodedRootHash) 254 if err != nil { 255 ctx.Warningf("verity.FilesystemType.GetFilesystem: Failed to decode root hash: %v", err) 256 return nil, nil, linuxerr.EINVAL 257 } 258 rootHash = hash 259 } 260 var lowerPathname string 261 if path, ok := mopts[moptLowerPath]; ok { 262 delete(mopts, moptLowerPath) 263 lowerPathname = path 264 } 265 rootName := "root" 266 if root, ok := mopts[moptRootName]; ok { 267 delete(mopts, moptRootName) 268 rootName = root 269 } 270 271 // Check for unparsed options. 272 if len(mopts) != 0 { 273 ctx.Warningf("verity.FilesystemType.GetFilesystem: unknown options: %v", mopts) 274 return nil, nil, linuxerr.EINVAL 275 } 276 277 // Handle internal options. 278 iopts, ok := opts.InternalData.(InternalFilesystemOptions) 279 if len(lowerPathname) == 0 && !ok { 280 ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs") 281 return nil, nil, linuxerr.EINVAL 282 } 283 if len(lowerPathname) != 0 { 284 if ok { 285 ctx.Warningf("verity.FilesystemType.GetFilesystem: unexpected verity configs with specified lower path") 286 return nil, nil, linuxerr.EINVAL 287 } 288 iopts = InternalFilesystemOptions{ 289 AllowRuntimeEnable: len(rootHash) == 0, 290 Action: ErrorOnViolation, 291 } 292 } 293 294 var lowerMount *vfs.Mount 295 var mountedLowerVD vfs.VirtualDentry 296 // Use an existing mount if lowerPath is provided. 297 if len(lowerPathname) != 0 { 298 vfsroot := vfs.RootFromContext(ctx) 299 if vfsroot.Ok() { 300 defer vfsroot.DecRef(ctx) 301 } 302 lowerPath := fspath.Parse(lowerPathname) 303 if !lowerPath.Absolute { 304 ctx.Infof("verity.FilesystemType.GetFilesystem: lower_path %q must be absolute", lowerPathname) 305 return nil, nil, linuxerr.EINVAL 306 } 307 var err error 308 mountedLowerVD, err = vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{ 309 Root: vfsroot, 310 Start: vfsroot, 311 Path: lowerPath, 312 FollowFinalSymlink: true, 313 }, &vfs.GetDentryOptions{ 314 CheckSearchable: true, 315 }) 316 if err != nil { 317 ctx.Infof("verity.FilesystemType.GetFilesystem: failed to resolve lower_path %q: %v", lowerPathname, err) 318 return nil, nil, err 319 } 320 lowerMount = mountedLowerVD.Mount() 321 defer mountedLowerVD.DecRef(ctx) 322 } else { 323 // Mount the lower file system. The lower file system is wrapped inside 324 // verity, and should not be exposed or connected. 325 mountOpts := &vfs.MountOptions{ 326 GetFilesystemOptions: iopts.LowerGetFSOptions, 327 InternalMount: true, 328 } 329 mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mountOpts) 330 if err != nil { 331 return nil, nil, err 332 } 333 lowerMount = mnt 334 } 335 336 fs := &filesystem{ 337 creds: creds.Fork(), 338 alg: iopts.Alg, 339 lowerMount: lowerMount, 340 action: iopts.Action, 341 opts: opts.Data, 342 allowRuntimeEnable: iopts.AllowRuntimeEnable, 343 } 344 fs.vfsfs.Init(vfsObj, &fstype, fs) 345 346 // Construct the root dentry. 347 d := fs.newDentry() 348 d.refs = 1 349 lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root()) 350 lowerVD.IncRef() 351 d.lowerVD = lowerVD 352 353 rootMerkleName := merkleRootPrefix + rootName 354 355 lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ 356 Root: lowerVD, 357 Start: lowerVD, 358 Path: fspath.Parse(rootMerkleName), 359 }, &vfs.GetDentryOptions{}) 360 361 // If runtime enable is allowed, the root merkle tree may be absent. We 362 // should create the tree file. 363 if linuxerr.Equals(linuxerr.ENOENT, err) && fs.allowRuntimeEnable { 364 lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 365 Root: lowerVD, 366 Start: lowerVD, 367 Path: fspath.Parse(rootMerkleName), 368 }, &vfs.OpenOptions{ 369 Flags: linux.O_RDWR | linux.O_CREAT, 370 Mode: 0644, 371 }) 372 if err != nil { 373 fs.vfsfs.DecRef(ctx) 374 d.DecRef(ctx) 375 return nil, nil, err 376 } 377 lowerMerkleFD.DecRef(ctx) 378 lowerMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ 379 Root: lowerVD, 380 Start: lowerVD, 381 Path: fspath.Parse(rootMerkleName), 382 }, &vfs.GetDentryOptions{}) 383 if err != nil { 384 fs.vfsfs.DecRef(ctx) 385 d.DecRef(ctx) 386 return nil, nil, err 387 } 388 } else if err != nil { 389 // Failed to get dentry for the root Merkle file. This 390 // indicates an unexpected modification that removed/renamed 391 // the root Merkle file, or it's never generated. 392 fs.vfsfs.DecRef(ctx) 393 d.DecRef(ctx) 394 return nil, nil, fs.alertIntegrityViolation("Failed to find root Merkle file") 395 } 396 397 // Clear the Merkle tree file if they are to be generated at runtime. 398 // TODO(b/182315468): Optimize the Merkle tree generate process to 399 // allow only updating certain files/directories. 400 if fs.allowRuntimeEnable { 401 lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 402 Root: lowerMerkleVD, 403 Start: lowerMerkleVD, 404 }, &vfs.OpenOptions{ 405 Flags: linux.O_RDWR | linux.O_TRUNC, 406 Mode: 0644, 407 }) 408 if err != nil { 409 return nil, nil, err 410 } 411 lowerMerkleFD.DecRef(ctx) 412 } 413 414 d.lowerMerkleVD = lowerMerkleVD 415 416 // Get metadata from the underlying file system. 417 const statMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID 418 stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ 419 Root: lowerVD, 420 Start: lowerVD, 421 }, &vfs.StatOptions{ 422 Mask: statMask, 423 }) 424 if err != nil { 425 fs.vfsfs.DecRef(ctx) 426 d.DecRef(ctx) 427 return nil, nil, err 428 } 429 430 d.mode = uint32(stat.Mode) 431 d.uid = stat.UID 432 d.gid = stat.GID 433 d.childrenNames = make(map[string]struct{}) 434 435 d.hashMu.Lock() 436 d.hash = make([]byte, len(rootHash)) 437 copy(d.hash, rootHash) 438 d.hashMu.Unlock() 439 440 fs.rootDentry = d 441 442 if !d.isDir() { 443 ctx.Warningf("verity root must be a directory") 444 return nil, nil, linuxerr.EINVAL 445 } 446 447 if !fs.allowRuntimeEnable { 448 // Get children names from the underlying file system. 449 offString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{ 450 Root: lowerMerkleVD, 451 Start: lowerMerkleVD, 452 }, &vfs.GetXattrOptions{ 453 Name: childrenOffsetXattr, 454 Size: sizeOfStringInt32, 455 }) 456 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { 457 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err)) 458 } 459 if err != nil { 460 return nil, nil, err 461 } 462 463 off, err := strconv.Atoi(offString) 464 if err != nil { 465 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err)) 466 } 467 468 sizeString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{ 469 Root: lowerMerkleVD, 470 Start: lowerMerkleVD, 471 }, &vfs.GetXattrOptions{ 472 Name: childrenSizeXattr, 473 Size: sizeOfStringInt32, 474 }) 475 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { 476 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err)) 477 } 478 if err != nil { 479 return nil, nil, err 480 } 481 size, err := strconv.Atoi(sizeString) 482 if err != nil { 483 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err)) 484 } 485 486 lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ 487 Root: lowerMerkleVD, 488 Start: lowerMerkleVD, 489 }, &vfs.OpenOptions{ 490 Flags: linux.O_RDONLY, 491 }) 492 if linuxerr.Equals(linuxerr.ENOENT, err) { 493 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err)) 494 } 495 if err != nil { 496 return nil, nil, err 497 } 498 499 defer lowerMerkleFD.DecRef(ctx) 500 501 childrenNames := make([]byte, size) 502 if _, err := lowerMerkleFD.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(off), vfs.ReadOptions{}); err != nil { 503 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to read root children map: %v", err)) 504 } 505 506 if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil { 507 return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames: %v", err)) 508 } 509 510 if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { 511 return nil, nil, err 512 } 513 d.generateChildrenList() 514 } 515 516 d.vfsd.Init(d) 517 518 return &fs.vfsfs, &d.vfsd, nil 519 } 520 521 // Release implements vfs.FilesystemImpl.Release. 522 func (fs *filesystem) Release(ctx context.Context) { 523 fs.lowerMount.DecRef(ctx) 524 } 525 526 // MountOptions implements vfs.FilesystemImpl.MountOptions. 527 func (fs *filesystem) MountOptions() string { 528 return fs.opts 529 } 530 531 // dentry implements vfs.DentryImpl. 532 // 533 // +stateify savable 534 type dentry struct { 535 vfsd vfs.Dentry 536 537 refs int64 538 539 // fs is the owning filesystem. fs is immutable. 540 fs *filesystem 541 542 // mode, uid, gid and size are the file mode, owner, group, and size of 543 // the file in the underlying file system. They are set when a dentry 544 // is initialized, and never modified. 545 mode uint32 546 uid uint32 547 gid uint32 548 size uint32 549 550 // parent is the dentry corresponding to this dentry's parent directory. 551 // name is this dentry's name in parent. If this dentry is a filesystem 552 // root, parent is nil and name is the empty string. parent and name are 553 // protected by fs.renameMu. 554 parent *dentry 555 name string 556 557 // If this dentry represents a directory, children maps the names of 558 // children for which dentries have been instantiated to those dentries, 559 // and dirents (if not nil) is a cache of dirents as returned by 560 // directoryFDs representing this directory. children is protected by 561 // dirMu. 562 dirMu sync.Mutex `state:"nosave"` 563 children map[string]*dentry 564 565 // childrenNames stores the name of all children of the dentry. This is 566 // used by verity to check whether a child is expected. This is only 567 // populated by enableVerity. childrenNames is also protected by dirMu. 568 childrenNames map[string]struct{} 569 570 // childrenList is a complete sorted list of childrenNames. This list 571 // is generated when verity is enabled, or the first time the file is 572 // verified in non runtime enable mode. 573 childrenList []string 574 575 // lowerVD is the VirtualDentry in the underlying file system. It is 576 // never modified after initialized. 577 lowerVD vfs.VirtualDentry 578 579 // lowerMerkleVD is the VirtualDentry of the corresponding Merkle tree 580 // in the underlying file system. It is never modified after 581 // initialized. 582 lowerMerkleVD vfs.VirtualDentry 583 584 // symlinkTarget is the target path of a symlink file in the underlying filesystem. 585 symlinkTarget string 586 587 // hash is the calculated hash for the current file or directory. hash 588 // is protected by hashMu. 589 hashMu sync.RWMutex `state:"nosave"` 590 hash []byte 591 } 592 593 // newDentry creates a new dentry representing the given verity file. The 594 // dentry initially has no references; it is the caller's responsibility to set 595 // the dentry's reference count and/or call dentry.destroy() as appropriate. 596 // The dentry is initially invalid in that it contains no underlying dentry; 597 // the caller is responsible for setting them. 598 func (fs *filesystem) newDentry() *dentry { 599 d := &dentry{ 600 fs: fs, 601 } 602 d.vfsd.Init(d) 603 refsvfs2.Register(d) 604 return d 605 } 606 607 // IncRef implements vfs.DentryImpl.IncRef. 608 func (d *dentry) IncRef() { 609 r := atomic.AddInt64(&d.refs, 1) 610 if d.LogRefs() { 611 refsvfs2.LogIncRef(d, r) 612 } 613 } 614 615 // TryIncRef implements vfs.DentryImpl.TryIncRef. 616 func (d *dentry) TryIncRef() bool { 617 for { 618 r := atomic.LoadInt64(&d.refs) 619 if r <= 0 { 620 return false 621 } 622 if atomic.CompareAndSwapInt64(&d.refs, r, r+1) { 623 if d.LogRefs() { 624 refsvfs2.LogTryIncRef(d, r+1) 625 } 626 return true 627 } 628 } 629 } 630 631 // DecRef implements vfs.DentryImpl.DecRef. 632 func (d *dentry) DecRef(ctx context.Context) { 633 r := atomic.AddInt64(&d.refs, -1) 634 if d.LogRefs() { 635 refsvfs2.LogDecRef(d, r) 636 } 637 if r == 0 { 638 d.fs.renameMu.Lock() 639 d.checkDropLocked(ctx) 640 d.fs.renameMu.Unlock() 641 } else if r < 0 { 642 panic("verity.dentry.DecRef() called without holding a reference") 643 } 644 } 645 646 func (d *dentry) decRefLocked(ctx context.Context) { 647 r := atomic.AddInt64(&d.refs, -1) 648 if d.LogRefs() { 649 refsvfs2.LogDecRef(d, r) 650 } 651 if r == 0 { 652 d.checkDropLocked(ctx) 653 } else if r < 0 { 654 panic("verity.dentry.decRefLocked() called without holding a reference") 655 } 656 } 657 658 // checkDropLocked should be called after d's reference count becomes 0 or it 659 // becomes deleted. 660 func (d *dentry) checkDropLocked(ctx context.Context) { 661 // Dentries with a positive reference count must be retained. Dentries 662 // with a negative reference count have already been destroyed. 663 if atomic.LoadInt64(&d.refs) != 0 { 664 return 665 } 666 // Refs is still zero; destroy it. 667 d.destroyLocked(ctx) 668 return 669 } 670 671 // destroyLocked destroys the dentry. 672 // 673 // Preconditions: 674 // * d.fs.renameMu must be locked for writing. 675 // * d.refs == 0. 676 func (d *dentry) destroyLocked(ctx context.Context) { 677 switch atomic.LoadInt64(&d.refs) { 678 case 0: 679 // Mark the dentry destroyed. 680 atomic.StoreInt64(&d.refs, -1) 681 case -1: 682 panic("verity.dentry.destroyLocked() called on already destroyed dentry") 683 default: 684 panic("verity.dentry.destroyLocked() called with references on the dentry") 685 } 686 687 if d.lowerVD.Ok() { 688 d.lowerVD.DecRef(ctx) 689 } 690 if d.lowerMerkleVD.Ok() { 691 d.lowerMerkleVD.DecRef(ctx) 692 } 693 if d.parent != nil { 694 d.parent.dirMu.Lock() 695 if !d.vfsd.IsDead() { 696 delete(d.parent.children, d.name) 697 } 698 d.parent.dirMu.Unlock() 699 d.parent.decRefLocked(ctx) 700 } 701 refsvfs2.Unregister(d) 702 } 703 704 // RefType implements refsvfs2.CheckedObject.Type. 705 func (d *dentry) RefType() string { 706 return "verity.dentry" 707 } 708 709 // LeakMessage implements refsvfs2.CheckedObject.LeakMessage. 710 func (d *dentry) LeakMessage() string { 711 return fmt.Sprintf("[verity.dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs)) 712 } 713 714 // LogRefs implements refsvfs2.CheckedObject.LogRefs. 715 // 716 // This should only be set to true for debugging purposes, as it can generate an 717 // extremely large amount of output and drastically degrade performance. 718 func (d *dentry) LogRefs() bool { 719 return false 720 } 721 722 // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. 723 func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) { 724 //TODO(b/159261227): Implement InotifyWithParent. 725 } 726 727 // Watches implements vfs.DentryImpl.Watches. 728 func (d *dentry) Watches() *vfs.Watches { 729 //TODO(b/159261227): Implement Watches. 730 return nil 731 } 732 733 // OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. 734 func (d *dentry) OnZeroWatches(context.Context) { 735 //TODO(b/159261227): Implement OnZeroWatches. 736 } 737 738 func (d *dentry) isSymlink() bool { 739 return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK 740 } 741 742 func (d *dentry) isDir() bool { 743 return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR 744 } 745 746 func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { 747 return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) 748 } 749 750 // verityEnabled checks whether the file is enabled with verity features. It 751 // should always be true if runtime enable is not allowed. In runtime enable 752 // mode, it returns true if the target has been enabled with 753 // ioctl(FS_IOC_ENABLE_VERITY). 754 func (d *dentry) verityEnabled() bool { 755 d.hashMu.RLock() 756 defer d.hashMu.RUnlock() 757 return !d.fs.allowRuntimeEnable || len(d.hash) != 0 758 } 759 760 // generateChildrenList generates a sorted childrenList from childrenNames, and 761 // cache it in d for hashing. 762 func (d *dentry) generateChildrenList() { 763 if len(d.childrenList) == 0 && len(d.childrenNames) != 0 { 764 for child := range d.childrenNames { 765 d.childrenList = append(d.childrenList, child) 766 } 767 sort.Strings(d.childrenList) 768 } 769 } 770 771 // getLowerAt returns the dentry in the underlying file system, which is 772 // represented by filename relative to d. 773 func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) { 774 return vfsObj.GetDentryAt(ctx, d.fs.creds, &vfs.PathOperation{ 775 Root: d.lowerVD, 776 Start: d.lowerVD, 777 Path: fspath.Parse(filename), 778 }, &vfs.GetDentryOptions{}) 779 } 780 781 func (d *dentry) readlink(ctx context.Context) (string, error) { 782 vfsObj := d.fs.vfsfs.VirtualFilesystem() 783 if d.verityEnabled() { 784 stat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ 785 Root: d.lowerVD, 786 Start: d.lowerVD, 787 }, &vfs.StatOptions{}) 788 if err != nil { 789 return "", err 790 } 791 d.dirMu.Lock() 792 defer d.dirMu.Unlock() 793 if err := d.fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { 794 return "", err 795 } 796 return d.symlinkTarget, nil 797 } 798 799 return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 800 Root: d.lowerVD, 801 Start: d.lowerVD, 802 }) 803 } 804 805 // FileDescription implements vfs.FileDescriptionImpl for verity fds. 806 // FileDescription is a wrapper of the underlying lowerFD, with support to build 807 // Merkle trees through the Linux fs-verity API to verify contents read from 808 // lowerFD. 809 // 810 // +stateify savable 811 type fileDescription struct { 812 vfsfd vfs.FileDescription 813 vfs.FileDescriptionDefaultImpl 814 815 // d is the corresponding dentry to the fileDescription. 816 d *dentry 817 818 // isDir specifies whehter the fileDescription points to a directory. 819 isDir bool 820 821 // lowerFD is the FileDescription corresponding to the file in the 822 // underlying file system. 823 lowerFD *vfs.FileDescription 824 825 // lowerMappable is the memmap.Mappable corresponding to this file in the 826 // underlying file system. 827 lowerMappable memmap.Mappable 828 829 // merkleReader is the read-only FileDescription corresponding to the 830 // Merkle tree file in the underlying file system. 831 merkleReader *vfs.FileDescription 832 833 // merkleWriter is the FileDescription corresponding to the Merkle tree 834 // file in the underlying file system for writing. This should only be 835 // used when allowRuntimeEnable is set to true. 836 merkleWriter *vfs.FileDescription 837 838 // parentMerkleWriter is the FileDescription of the Merkle tree for the 839 // directory that contains the current file/directory. This is only used 840 // if allowRuntimeEnable is set to true. 841 parentMerkleWriter *vfs.FileDescription 842 843 // off is the file offset. off is protected by mu. 844 mu sync.Mutex `state:"nosave"` 845 off int64 846 } 847 848 // Release implements vfs.FileDescriptionImpl.Release. 849 func (fd *fileDescription) Release(ctx context.Context) { 850 fd.lowerFD.DecRef(ctx) 851 fd.merkleReader.DecRef(ctx) 852 if fd.merkleWriter != nil { 853 fd.merkleWriter.DecRef(ctx) 854 } 855 if fd.parentMerkleWriter != nil { 856 fd.parentMerkleWriter.DecRef(ctx) 857 } 858 } 859 860 // Stat implements vfs.FileDescriptionImpl.Stat. 861 func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { 862 stat, err := fd.lowerFD.Stat(ctx, opts) 863 if err != nil { 864 return linux.Statx{}, err 865 } 866 fd.d.dirMu.Lock() 867 if fd.d.verityEnabled() { 868 if err := fd.d.fs.verifyStatAndChildrenLocked(ctx, fd.d, stat); err != nil { 869 return linux.Statx{}, err 870 } 871 } 872 fd.d.dirMu.Unlock() 873 return stat, nil 874 } 875 876 // SetStat implements vfs.FileDescriptionImpl.SetStat. 877 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { 878 // Verity files are read-only. 879 return linuxerr.EPERM 880 } 881 882 // IterDirents implements vfs.FileDescriptionImpl.IterDirents. 883 func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { 884 if !fd.d.isDir() { 885 return syserror.ENOTDIR 886 } 887 fd.mu.Lock() 888 defer fd.mu.Unlock() 889 890 if _, err := fd.lowerFD.Seek(ctx, fd.off, linux.SEEK_SET); err != nil { 891 return err 892 } 893 894 var ds []vfs.Dirent 895 err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { 896 // Do not include the Merkle tree files. 897 if strings.Contains(dirent.Name, merklePrefix) || strings.Contains(dirent.Name, merkleRootPrefix) { 898 return nil 899 } 900 if fd.d.verityEnabled() { 901 // Verify that the child is expected. 902 if dirent.Name != "." && dirent.Name != ".." { 903 if _, ok := fd.d.childrenNames[dirent.Name]; !ok { 904 return fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Unexpected children %s", dirent.Name)) 905 } 906 } 907 } 908 ds = append(ds, dirent) 909 return nil 910 })) 911 912 if err != nil { 913 return err 914 } 915 916 // The result should be a part of all children plus "." and "..", counting from fd.off. 917 if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2-int(fd.off) { 918 return fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds))) 919 } 920 921 for fd.off < int64(len(ds)) { 922 if err := cb.Handle(ds[fd.off]); err != nil { 923 return err 924 } 925 fd.off++ 926 } 927 return nil 928 } 929 930 // Seek implements vfs.FileDescriptionImpl.Seek. 931 func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 932 fd.mu.Lock() 933 defer fd.mu.Unlock() 934 n := int64(0) 935 switch whence { 936 case linux.SEEK_SET: 937 // use offset as specified 938 case linux.SEEK_CUR: 939 n = fd.off 940 case linux.SEEK_END: 941 n = int64(fd.d.size) 942 default: 943 return 0, linuxerr.EINVAL 944 } 945 if offset > math.MaxInt64-n { 946 return 0, linuxerr.EINVAL 947 } 948 offset += n 949 if offset < 0 { 950 return 0, linuxerr.EINVAL 951 } 952 fd.off = offset 953 return offset, nil 954 } 955 956 // generateMerkleLocked generates a Merkle tree file for fd. If fd points to a 957 // file /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The 958 // hash of the generated Merkle tree and the data size is returned. If fd 959 // points to a regular file, the data is the content of the file. If fd points 960 // to a directory, the data is all hashes of its children, written to the Merkle 961 // tree file. If fd represents a symlink, the data is empty and nothing is written 962 // to the Merkle tree file. 963 // 964 // Preconditions: fd.d.fs.verityMu must be locked. 965 func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, uint64, error) { 966 fdReader := FileReadWriteSeeker{ 967 FD: fd.lowerFD, 968 Ctx: ctx, 969 } 970 merkleReader := FileReadWriteSeeker{ 971 FD: fd.merkleReader, 972 Ctx: ctx, 973 } 974 merkleWriter := FileReadWriteSeeker{ 975 FD: fd.merkleWriter, 976 Ctx: ctx, 977 } 978 979 stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{}) 980 if err != nil { 981 return nil, 0, err 982 } 983 984 fd.d.generateChildrenList() 985 986 params := &merkletree.GenerateParams{ 987 TreeReader: &merkleReader, 988 TreeWriter: &merkleWriter, 989 Children: fd.d.childrenList, 990 HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), 991 Name: fd.d.name, 992 Mode: uint32(stat.Mode), 993 UID: stat.UID, 994 GID: stat.GID, 995 } 996 997 switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT { 998 case linux.S_IFREG: 999 // For a regular file, generate a Merkle tree based on its 1000 // content. 1001 params.File = &fdReader 1002 params.Size = int64(stat.Size) 1003 params.DataAndTreeInSameFile = false 1004 case linux.S_IFDIR: 1005 // For a directory, generate a Merkle tree based on the hashes 1006 // of its children that has already been written to the Merkle 1007 // tree file. 1008 merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{}) 1009 if err != nil { 1010 return nil, 0, err 1011 } 1012 1013 params.Size = int64(merkleStat.Size) 1014 params.File = &merkleReader 1015 params.DataAndTreeInSameFile = true 1016 case linux.S_IFLNK: 1017 // For a symlink, generate a Merkle tree file but do not write the root hash 1018 // of the target file content to it. Return a hash of a VerityDescriptor object 1019 // which includes the symlink target name. 1020 target, err := fd.d.readlink(ctx) 1021 if err != nil { 1022 return nil, 0, err 1023 } 1024 1025 params.Size = int64(stat.Size) 1026 params.DataAndTreeInSameFile = false 1027 params.SymlinkTarget = target 1028 default: 1029 // TODO(b/167728857): Investigate whether and how we should 1030 // enable other types of file. 1031 return nil, 0, linuxerr.EINVAL 1032 } 1033 hash, err := merkletree.Generate(params) 1034 return hash, uint64(params.Size), err 1035 } 1036 1037 // recordChildrenLocked writes the names of fd's children into the 1038 // corresponding Merkle tree file, and saves the offset/size of the map into 1039 // xattrs. 1040 // 1041 // Preconditions: 1042 // * fd.d.fs.verityMu must be locked. 1043 // * fd.d.isDir() == true. 1044 func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error { 1045 // Record the children names in the Merkle tree file. 1046 childrenNames, err := json.Marshal(fd.d.childrenNames) 1047 if err != nil { 1048 return err 1049 } 1050 1051 stat, err := fd.merkleWriter.Stat(ctx, vfs.StatOptions{}) 1052 if err != nil { 1053 return err 1054 } 1055 1056 if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ 1057 Name: childrenOffsetXattr, 1058 Value: strconv.Itoa(int(stat.Size)), 1059 }); err != nil { 1060 return err 1061 } 1062 if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ 1063 Name: childrenSizeXattr, 1064 Value: strconv.Itoa(len(childrenNames)), 1065 }); err != nil { 1066 return err 1067 } 1068 1069 if _, err = fd.merkleWriter.Write(ctx, usermem.BytesIOSequence(childrenNames), vfs.WriteOptions{}); err != nil { 1070 return err 1071 } 1072 1073 return nil 1074 } 1075 1076 // enableVerity enables verity features on fd by generating a Merkle tree file 1077 // and stores its hash in its parent directory's Merkle tree. 1078 func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { 1079 if !fd.d.fs.allowRuntimeEnable { 1080 return 0, linuxerr.EPERM 1081 } 1082 1083 fd.d.fs.verityMu.Lock() 1084 defer fd.d.fs.verityMu.Unlock() 1085 1086 // In allowRuntimeEnable mode, the underlying fd and read/write fd for 1087 // the Merkle tree file should have all been initialized. For any file 1088 // or directory other than the root, the parent Merkle tree file should 1089 // have also been initialized. 1090 if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || (fd.parentMerkleWriter == nil && fd.d != fd.d.fs.rootDentry) { 1091 return 0, fd.d.fs.alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds") 1092 } 1093 1094 hash, dataSize, err := fd.generateMerkleLocked(ctx) 1095 if err != nil { 1096 return 0, err 1097 } 1098 1099 if fd.parentMerkleWriter != nil { 1100 stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{}) 1101 if err != nil { 1102 return 0, err 1103 } 1104 1105 // Write the hash of fd to the parent directory's Merkle tree 1106 // file, as it should be part of the parent Merkle tree data. 1107 // parentMerkleWriter is open with O_APPEND, so it should write 1108 // directly to the end of the file. 1109 if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(hash), vfs.WriteOptions{}); err != nil { 1110 return 0, err 1111 } 1112 1113 // Record the offset of the hash of fd in parent directory's 1114 // Merkle tree file. 1115 if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ 1116 Name: merkleOffsetInParentXattr, 1117 Value: strconv.Itoa(int(stat.Size)), 1118 }); err != nil { 1119 return 0, err 1120 } 1121 1122 // Add the current child's name to parent's childrenNames. 1123 fd.d.parent.childrenNames[fd.d.name] = struct{}{} 1124 } 1125 1126 // Record the size of the data being hashed for fd. 1127 if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ 1128 Name: merkleSizeXattr, 1129 Value: strconv.Itoa(int(dataSize)), 1130 }); err != nil { 1131 return 0, err 1132 } 1133 1134 if fd.d.isDir() { 1135 if err := fd.recordChildrenLocked(ctx); err != nil { 1136 return 0, err 1137 } 1138 } 1139 fd.d.hashMu.Lock() 1140 fd.d.hash = hash 1141 fd.d.hashMu.Unlock() 1142 return 0, nil 1143 } 1144 1145 // measureVerity returns the hash of fd, saved in verityDigest. 1146 func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) { 1147 t := kernel.TaskFromContext(ctx) 1148 if t == nil { 1149 return 0, linuxerr.EINVAL 1150 } 1151 var metadata linux.DigestMetadata 1152 1153 fd.d.hashMu.RLock() 1154 defer fd.d.hashMu.RUnlock() 1155 1156 // If allowRuntimeEnable is true, an empty fd.d.hash indicates that 1157 // verity is not enabled for the file. If allowRuntimeEnable is false, 1158 // this is an integrity violation because all files should have verity 1159 // enabled, in which case fd.d.hash should be set. 1160 if len(fd.d.hash) == 0 { 1161 if fd.d.fs.allowRuntimeEnable { 1162 return 0, linuxerr.ENODATA 1163 } 1164 return 0, fd.d.fs.alertIntegrityViolation("Ioctl measureVerity: no hash found") 1165 } 1166 1167 // The first part of VerityDigest is the metadata. 1168 if _, err := metadata.CopyIn(t, verityDigest); err != nil { 1169 return 0, err 1170 } 1171 if metadata.DigestSize < uint16(len(fd.d.hash)) { 1172 return 0, linuxerr.EOVERFLOW 1173 } 1174 1175 // Populate the output digest size, since DigestSize is both input and 1176 // output. 1177 metadata.DigestSize = uint16(len(fd.d.hash)) 1178 1179 // First copy the metadata. 1180 if _, err := metadata.CopyOut(t, verityDigest); err != nil { 1181 return 0, err 1182 } 1183 1184 // Now copy the root hash bytes to the memory after metadata. 1185 _, err := t.CopyOutBytes(hostarch.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash) 1186 return 0, err 1187 } 1188 1189 func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr) (uintptr, error) { 1190 f := int32(0) 1191 1192 fd.d.hashMu.RLock() 1193 // All enabled files should store a hash. This flag is not settable via 1194 // FS_IOC_SETFLAGS. 1195 if len(fd.d.hash) != 0 { 1196 f |= linux.FS_VERITY_FL 1197 } 1198 fd.d.hashMu.RUnlock() 1199 1200 t := kernel.TaskFromContext(ctx) 1201 if t == nil { 1202 return 0, linuxerr.EINVAL 1203 } 1204 _, err := primitive.CopyInt32Out(t, flags, f) 1205 return 0, err 1206 } 1207 1208 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 1209 func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { 1210 switch cmd := args[1].Uint(); cmd { 1211 case linux.FS_IOC_ENABLE_VERITY: 1212 return fd.enableVerity(ctx) 1213 case linux.FS_IOC_MEASURE_VERITY: 1214 return fd.measureVerity(ctx, args[2].Pointer()) 1215 case linux.FS_IOC_GETFLAGS: 1216 return fd.verityFlags(ctx, args[2].Pointer()) 1217 default: 1218 return 0, syserror.ENOSYS 1219 } 1220 } 1221 1222 // Read implements vfs.FileDescriptionImpl.Read. 1223 func (fd *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 1224 // Implement Read with PRead by setting offset. 1225 fd.mu.Lock() 1226 n, err := fd.PRead(ctx, dst, fd.off, opts) 1227 fd.off += n 1228 fd.mu.Unlock() 1229 return n, err 1230 } 1231 1232 // PRead implements vfs.FileDescriptionImpl.PRead. 1233 func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 1234 // No need to verify if the file is not enabled yet in 1235 // allowRuntimeEnable mode. 1236 if !fd.d.verityEnabled() { 1237 return fd.lowerFD.PRead(ctx, dst, offset, opts) 1238 } 1239 1240 fd.d.fs.verityMu.RLock() 1241 defer fd.d.fs.verityMu.RUnlock() 1242 // dataSize is the size of the whole file. 1243 dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{ 1244 Name: merkleSizeXattr, 1245 Size: sizeOfStringInt32, 1246 }) 1247 1248 // The Merkle tree file for the child should have been created and 1249 // contains the expected xattrs. If the xattr does not exist, it 1250 // indicates unexpected modifications to the file system. 1251 if linuxerr.Equals(linuxerr.ENODATA, err) { 1252 return 0, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) 1253 } 1254 if err != nil { 1255 return 0, err 1256 } 1257 1258 // The dataSize xattr should be an integer. If it's not, it indicates 1259 // unexpected modifications to the file system. 1260 size, err := strconv.Atoi(dataSize) 1261 if err != nil { 1262 return 0, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err)) 1263 } 1264 1265 dataReader := FileReadWriteSeeker{ 1266 FD: fd.lowerFD, 1267 Ctx: ctx, 1268 } 1269 1270 merkleReader := FileReadWriteSeeker{ 1271 FD: fd.merkleReader, 1272 Ctx: ctx, 1273 } 1274 1275 fd.d.hashMu.RLock() 1276 n, err := merkletree.Verify(&merkletree.VerifyParams{ 1277 Out: dst.Writer(ctx), 1278 File: &dataReader, 1279 Tree: &merkleReader, 1280 Size: int64(size), 1281 Name: fd.d.name, 1282 Mode: fd.d.mode, 1283 UID: fd.d.uid, 1284 GID: fd.d.gid, 1285 Children: fd.d.childrenList, 1286 HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), 1287 ReadOffset: offset, 1288 ReadSize: dst.NumBytes(), 1289 Expected: fd.d.hash, 1290 DataAndTreeInSameFile: false, 1291 }) 1292 fd.d.hashMu.RUnlock() 1293 if err != nil { 1294 return 0, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err)) 1295 } 1296 return n, err 1297 } 1298 1299 // PWrite implements vfs.FileDescriptionImpl.PWrite. 1300 func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 1301 return 0, linuxerr.EROFS 1302 } 1303 1304 // Write implements vfs.FileDescriptionImpl.Write. 1305 func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 1306 return 0, linuxerr.EROFS 1307 } 1308 1309 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. 1310 func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 1311 if err := fd.lowerFD.ConfigureMMap(ctx, opts); err != nil { 1312 return err 1313 } 1314 fd.lowerMappable = opts.Mappable 1315 if opts.MappingIdentity != nil { 1316 opts.MappingIdentity.DecRef(ctx) 1317 opts.MappingIdentity = nil 1318 } 1319 1320 // Check if mmap is allowed on the lower filesystem. 1321 if !opts.SentryOwnedContent { 1322 return linuxerr.ENODEV 1323 } 1324 return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts) 1325 } 1326 1327 // SupportsLocks implements vfs.FileDescriptionImpl.SupportsLocks. 1328 func (fd *fileDescription) SupportsLocks() bool { 1329 return fd.lowerFD.SupportsLocks() 1330 } 1331 1332 // LockBSD implements vfs.FileDescriptionImpl.LockBSD. 1333 func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { 1334 return fd.lowerFD.LockBSD(ctx, ownerPID, t, block) 1335 } 1336 1337 // UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. 1338 func (fd *fileDescription) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { 1339 return fd.lowerFD.UnlockBSD(ctx) 1340 } 1341 1342 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. 1343 func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { 1344 return fd.lowerFD.LockPOSIX(ctx, uid, ownerPID, t, r, block) 1345 } 1346 1347 // UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. 1348 func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error { 1349 return fd.lowerFD.UnlockPOSIX(ctx, uid, r) 1350 } 1351 1352 // TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX. 1353 func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { 1354 return fd.lowerFD.TestPOSIX(ctx, uid, t, r) 1355 } 1356 1357 // Translate implements memmap.Mappable.Translate. 1358 func (fd *fileDescription) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 1359 ts, err := fd.lowerMappable.Translate(ctx, required, optional, at) 1360 if err != nil { 1361 return nil, err 1362 } 1363 1364 // dataSize is the size of the whole file. 1365 dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{ 1366 Name: merkleSizeXattr, 1367 Size: sizeOfStringInt32, 1368 }) 1369 1370 // The Merkle tree file for the child should have been created and 1371 // contains the expected xattrs. If the xattr does not exist, it 1372 // indicates unexpected modifications to the file system. 1373 if linuxerr.Equals(linuxerr.ENODATA, err) { 1374 return nil, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) 1375 } 1376 if err != nil { 1377 return nil, err 1378 } 1379 1380 // The dataSize xattr should be an integer. If it's not, it indicates 1381 // unexpected modifications to the file system. 1382 size, err := strconv.Atoi(dataSize) 1383 if err != nil { 1384 return nil, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err)) 1385 } 1386 1387 merkleReader := FileReadWriteSeeker{ 1388 FD: fd.merkleReader, 1389 Ctx: ctx, 1390 } 1391 1392 for _, t := range ts { 1393 // Content integrity relies on sentry owning the backing data. MapInternal is guaranteed 1394 // to fetch sentry owned memory because we disallow verity mmaps otherwise. 1395 ims, err := t.File.MapInternal(memmap.FileRange{t.Offset, t.Offset + t.Source.Length()}, hostarch.Read) 1396 if err != nil { 1397 return nil, err 1398 } 1399 dataReader := mmapReadSeeker{ims, t.Source.Start} 1400 var buf bytes.Buffer 1401 _, err = merkletree.Verify(&merkletree.VerifyParams{ 1402 Out: &buf, 1403 File: &dataReader, 1404 Tree: &merkleReader, 1405 Size: int64(size), 1406 Name: fd.d.name, 1407 Mode: fd.d.mode, 1408 UID: fd.d.uid, 1409 GID: fd.d.gid, 1410 HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), 1411 ReadOffset: int64(t.Source.Start), 1412 ReadSize: int64(t.Source.Length()), 1413 Expected: fd.d.hash, 1414 DataAndTreeInSameFile: false, 1415 }) 1416 if err != nil { 1417 return nil, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err)) 1418 } 1419 } 1420 return ts, err 1421 } 1422 1423 // AddMapping implements memmap.Mappable.AddMapping. 1424 func (fd *fileDescription) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 1425 return fd.lowerMappable.AddMapping(ctx, ms, ar, offset, writable) 1426 } 1427 1428 // RemoveMapping implements memmap.Mappable.RemoveMapping. 1429 func (fd *fileDescription) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 1430 fd.lowerMappable.RemoveMapping(ctx, ms, ar, offset, writable) 1431 } 1432 1433 // CopyMapping implements memmap.Mappable.CopyMapping. 1434 func (fd *fileDescription) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 1435 return fd.lowerMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable) 1436 } 1437 1438 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 1439 func (fd *fileDescription) InvalidateUnsavable(context.Context) error { 1440 return nil 1441 } 1442 1443 // mmapReadSeeker is a helper struct used by fileDescription.Translate to pass 1444 // a safemem.BlockSeq pointing to the mapped region as io.ReaderAt. 1445 type mmapReadSeeker struct { 1446 safemem.BlockSeq 1447 Offset uint64 1448 } 1449 1450 // ReadAt implements io.ReaderAt.ReadAt. off is the offset into the mapped file. 1451 func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) { 1452 bs := r.BlockSeq 1453 // Adjust the offset into the mapped file to get the offset into the internally 1454 // mapped region. 1455 readOffset := off - int64(r.Offset) 1456 if readOffset < 0 { 1457 return 0, linuxerr.EINVAL 1458 } 1459 bs.DropFirst64(uint64(readOffset)) 1460 view := bs.TakeFirst64(uint64(len(p))) 1461 dst := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p)) 1462 n, err := safemem.CopySeq(dst, view) 1463 return int(n), err 1464 } 1465 1466 // FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as 1467 // io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc. 1468 type FileReadWriteSeeker struct { 1469 FD *vfs.FileDescription 1470 Ctx context.Context 1471 ROpts vfs.ReadOptions 1472 WOpts vfs.WriteOptions 1473 } 1474 1475 // ReadAt implements io.ReaderAt.ReadAt. 1476 func (f *FileReadWriteSeeker) ReadAt(p []byte, off int64) (int, error) { 1477 dst := usermem.BytesIOSequence(p) 1478 n, err := f.FD.PRead(f.Ctx, dst, off, f.ROpts) 1479 return int(n), err 1480 } 1481 1482 // Read implements io.ReadWriteSeeker.Read. 1483 func (f *FileReadWriteSeeker) Read(p []byte) (int, error) { 1484 dst := usermem.BytesIOSequence(p) 1485 n, err := f.FD.Read(f.Ctx, dst, f.ROpts) 1486 return int(n), err 1487 } 1488 1489 // Seek implements io.ReadWriteSeeker.Seek. 1490 func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { 1491 return f.FD.Seek(f.Ctx, offset, int32(whence)) 1492 } 1493 1494 // WriteAt implements io.WriterAt.WriteAt. 1495 func (f *FileReadWriteSeeker) WriteAt(p []byte, off int64) (int, error) { 1496 dst := usermem.BytesIOSequence(p) 1497 n, err := f.FD.PWrite(f.Ctx, dst, off, f.WOpts) 1498 return int(n), err 1499 } 1500 1501 // Write implements io.ReadWriteSeeker.Write. 1502 func (f *FileReadWriteSeeker) Write(p []byte) (int, error) { 1503 buf := usermem.BytesIOSequence(p) 1504 n, err := f.FD.Write(f.Ctx, buf, f.WOpts) 1505 return int(n), err 1506 }