github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/tmpfs/inode_file.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tmpfs 16 17 import ( 18 "fmt" 19 "io" 20 "math" 21 22 "github.com/SagerNet/gvisor/pkg/abi/linux" 23 "github.com/SagerNet/gvisor/pkg/context" 24 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 25 "github.com/SagerNet/gvisor/pkg/hostarch" 26 "github.com/SagerNet/gvisor/pkg/safemem" 27 "github.com/SagerNet/gvisor/pkg/sentry/fs" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 29 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 30 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 31 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 32 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 33 "github.com/SagerNet/gvisor/pkg/sentry/usage" 34 "github.com/SagerNet/gvisor/pkg/sync" 35 "github.com/SagerNet/gvisor/pkg/usermem" 36 ) 37 38 // fileInodeOperations implements fs.InodeOperations for a regular tmpfs file. 39 // These files are backed by pages allocated from a platform.Memory, and may be 40 // directly mapped. 41 // 42 // Lock order: attrMu -> mapsMu -> dataMu. 43 // 44 // +stateify savable 45 type fileInodeOperations struct { 46 fsutil.InodeGenericChecker `state:"nosave"` 47 fsutil.InodeNoopWriteOut `state:"nosave"` 48 fsutil.InodeNotDirectory `state:"nosave"` 49 fsutil.InodeNotSocket `state:"nosave"` 50 fsutil.InodeNotSymlink `state:"nosave"` 51 52 fsutil.InodeSimpleExtendedAttributes 53 54 // kernel is used to allocate memory that stores the file's contents. 55 kernel *kernel.Kernel 56 57 // memUsage is the default memory usage that will be reported by this file. 58 memUsage usage.MemoryKind 59 60 attrMu sync.Mutex `state:"nosave"` 61 62 // attr contains the unstable metadata for the file. 63 // 64 // attr is protected by attrMu. attr.Size is protected by both attrMu 65 // and dataMu; reading it requires locking either mutex, while mutating 66 // it requires locking both. 67 attr fs.UnstableAttr 68 69 mapsMu sync.Mutex `state:"nosave"` 70 71 // mappings tracks mappings of the file into memmap.MappingSpaces. 72 // 73 // mappings is protected by mapsMu. 74 mappings memmap.MappingSet 75 76 // writableMappingPages tracks how many pages of virtual memory are mapped 77 // as potentially writable from this file. If a page has multiple mappings, 78 // each mapping is counted separately. 79 // 80 // This counter is susceptible to overflow as we can potentially count 81 // mappings from many VMAs. We count pages rather than bytes to slightly 82 // mitigate this. 83 // 84 // Protected by mapsMu. 85 writableMappingPages uint64 86 87 dataMu sync.RWMutex `state:"nosave"` 88 89 // data maps offsets into the file to offsets into platform.Memory() that 90 // store the file's data. 91 // 92 // data is protected by dataMu. 93 data fsutil.FileRangeSet 94 95 // seals represents file seals on this inode. 96 // 97 // Protected by dataMu. 98 seals uint32 99 } 100 101 var _ fs.InodeOperations = (*fileInodeOperations)(nil) 102 103 // NewInMemoryFile returns a new file backed by Kernel.MemoryFile(). 104 func NewInMemoryFile(ctx context.Context, usage usage.MemoryKind, uattr fs.UnstableAttr) fs.InodeOperations { 105 return &fileInodeOperations{ 106 attr: uattr, 107 kernel: kernel.KernelFromContext(ctx), 108 memUsage: usage, 109 seals: linux.F_SEAL_SEAL, 110 } 111 } 112 113 // NewMemfdInode creates a new inode backing a memfd. Memory used by the memfd 114 // is backed by platform memory. 115 func NewMemfdInode(ctx context.Context, allowSeals bool) *fs.Inode { 116 // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with 117 // S_IRWXUGO. 118 perms := fs.PermMask{Read: true, Write: true, Execute: true} 119 iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.UnstableAttr{ 120 Owner: fs.FileOwnerFromContext(ctx), 121 Perms: fs.FilePermissions{User: perms, Group: perms, Other: perms}}).(*fileInodeOperations) 122 if allowSeals { 123 iops.seals = 0 124 } 125 return fs.NewInode(ctx, iops, fs.NewNonCachingMountSource(ctx, nil, fs.MountSourceFlags{}), fs.StableAttr{ 126 Type: fs.RegularFile, 127 DeviceID: tmpfsDevice.DeviceID(), 128 InodeID: tmpfsDevice.NextIno(), 129 BlockSize: hostarch.PageSize, 130 }) 131 } 132 133 // Release implements fs.InodeOperations.Release. 134 func (f *fileInodeOperations) Release(context.Context) { 135 f.dataMu.Lock() 136 defer f.dataMu.Unlock() 137 f.data.DropAll(f.kernel.MemoryFile()) 138 } 139 140 // Mappable implements fs.InodeOperations.Mappable. 141 func (f *fileInodeOperations) Mappable(*fs.Inode) memmap.Mappable { 142 return f 143 } 144 145 // Rename implements fs.InodeOperations.Rename. 146 func (*fileInodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { 147 return rename(ctx, oldParent, oldName, newParent, newName, replacement) 148 } 149 150 // GetFile implements fs.InodeOperations.GetFile. 151 func (f *fileInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 152 if fs.IsSocket(d.Inode.StableAttr) { 153 return nil, linuxerr.ENXIO 154 } 155 156 if flags.Write { 157 fsmetric.TmpfsOpensW.Increment() 158 } else if flags.Read { 159 fsmetric.TmpfsOpensRO.Increment() 160 } 161 flags.Pread = true 162 flags.Pwrite = true 163 return fs.NewFile(ctx, d, flags, ®ularFileOperations{iops: f}), nil 164 } 165 166 // UnstableAttr returns unstable attributes of this tmpfs file. 167 func (f *fileInodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { 168 f.attrMu.Lock() 169 f.dataMu.RLock() 170 attr := f.attr 171 attr.Usage = int64(f.data.Span()) 172 f.dataMu.RUnlock() 173 f.attrMu.Unlock() 174 return attr, nil 175 } 176 177 // Check implements fs.InodeOperations.Check. 178 func (f *fileInodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool { 179 return fs.ContextCanAccessFile(ctx, inode, p) 180 } 181 182 // SetPermissions implements fs.InodeOperations.SetPermissions. 183 func (f *fileInodeOperations) SetPermissions(ctx context.Context, _ *fs.Inode, p fs.FilePermissions) bool { 184 f.attrMu.Lock() 185 f.attr.SetPermissions(ctx, p) 186 f.attrMu.Unlock() 187 return true 188 } 189 190 // SetTimestamps implements fs.InodeOperations.SetTimestamps. 191 func (f *fileInodeOperations) SetTimestamps(ctx context.Context, _ *fs.Inode, ts fs.TimeSpec) error { 192 f.attrMu.Lock() 193 f.attr.SetTimestamps(ctx, ts) 194 f.attrMu.Unlock() 195 return nil 196 } 197 198 // SetOwner implements fs.InodeOperations.SetOwner. 199 func (f *fileInodeOperations) SetOwner(ctx context.Context, _ *fs.Inode, owner fs.FileOwner) error { 200 f.attrMu.Lock() 201 f.attr.SetOwner(ctx, owner) 202 f.attrMu.Unlock() 203 return nil 204 } 205 206 // Truncate implements fs.InodeOperations.Truncate. 207 func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size int64) error { 208 f.attrMu.Lock() 209 defer f.attrMu.Unlock() 210 211 f.dataMu.Lock() 212 oldSize := f.attr.Size 213 214 // Check if current seals allow truncation. 215 switch { 216 case size > oldSize && f.seals&linux.F_SEAL_GROW != 0: // Grow sealed 217 fallthrough 218 case oldSize > size && f.seals&linux.F_SEAL_SHRINK != 0: // Shrink sealed 219 f.dataMu.Unlock() 220 return linuxerr.EPERM 221 } 222 223 if oldSize != size { 224 f.attr.Size = size 225 // Update mtime and ctime. 226 now := ktime.NowFromContext(ctx) 227 f.attr.ModificationTime = now 228 f.attr.StatusChangeTime = now 229 230 // Truncating clears privilege bits. 231 f.attr.Perms.SetUID = false 232 if f.attr.Perms.Group.Execute { 233 f.attr.Perms.SetGID = false 234 } 235 } 236 f.dataMu.Unlock() 237 238 // Nothing left to do unless shrinking the file. 239 if oldSize <= size { 240 return nil 241 } 242 243 oldpgend := fs.OffsetPageEnd(oldSize) 244 newpgend := fs.OffsetPageEnd(size) 245 246 // Invalidate past translations of truncated pages. 247 if newpgend != oldpgend { 248 f.mapsMu.Lock() 249 f.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ 250 // Compare Linux's mm/shmem.c:shmem_setattr() => 251 // mm/memory.c:unmap_mapping_range(evencows=1). 252 InvalidatePrivate: true, 253 }) 254 f.mapsMu.Unlock() 255 } 256 257 // We are now guaranteed that there are no translations of truncated pages, 258 // and can remove them. 259 f.dataMu.Lock() 260 defer f.dataMu.Unlock() 261 f.data.Truncate(uint64(size), f.kernel.MemoryFile()) 262 263 return nil 264 } 265 266 // Allocate implements fs.InodeOperations.Allocate. 267 func (f *fileInodeOperations) Allocate(ctx context.Context, _ *fs.Inode, offset, length int64) error { 268 newSize := offset + length 269 270 f.attrMu.Lock() 271 defer f.attrMu.Unlock() 272 f.dataMu.Lock() 273 defer f.dataMu.Unlock() 274 275 if newSize <= f.attr.Size { 276 return nil 277 } 278 279 // Check if current seals allow growth. 280 if f.seals&linux.F_SEAL_GROW != 0 { 281 return linuxerr.EPERM 282 } 283 284 f.attr.Size = newSize 285 286 now := ktime.NowFromContext(ctx) 287 f.attr.ModificationTime = now 288 f.attr.StatusChangeTime = now 289 290 return nil 291 } 292 293 // AddLink implements fs.InodeOperations.AddLink. 294 func (f *fileInodeOperations) AddLink() { 295 f.attrMu.Lock() 296 f.attr.Links++ 297 f.attrMu.Unlock() 298 } 299 300 // DropLink implements fs.InodeOperations.DropLink. 301 func (f *fileInodeOperations) DropLink() { 302 f.attrMu.Lock() 303 f.attr.Links-- 304 f.attrMu.Unlock() 305 } 306 307 // NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange. 308 func (f *fileInodeOperations) NotifyStatusChange(ctx context.Context) { 309 f.attrMu.Lock() 310 f.attr.StatusChangeTime = ktime.NowFromContext(ctx) 311 f.attrMu.Unlock() 312 } 313 314 // IsVirtual implements fs.InodeOperations.IsVirtual. 315 func (*fileInodeOperations) IsVirtual() bool { 316 return true 317 } 318 319 // StatFS implements fs.InodeOperations.StatFS. 320 func (*fileInodeOperations) StatFS(context.Context) (fs.Info, error) { 321 return fsInfo, nil 322 } 323 324 func (f *fileInodeOperations) read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 325 start := fsmetric.StartReadWait() 326 defer fsmetric.FinishReadWait(fsmetric.TmpfsReadWait, start) 327 fsmetric.TmpfsReads.Increment() 328 329 // Zero length reads for tmpfs are no-ops. 330 if dst.NumBytes() == 0 { 331 return 0, nil 332 } 333 334 // Have we reached EOF? We check for this again in 335 // fileReadWriter.ReadToBlocks to avoid holding f.attrMu (which would 336 // serialize reads) or f.dataMu (which would violate lock ordering), but 337 // check here first (before calling into MM) since reading at EOF is 338 // common: getting a return value of 0 from a read syscall is the only way 339 // to detect EOF. 340 // 341 // TODO(jamieliu): Separate out f.attr.Size and use atomics instead of 342 // f.dataMu. 343 f.dataMu.RLock() 344 size := f.attr.Size 345 f.dataMu.RUnlock() 346 if offset >= size { 347 return 0, io.EOF 348 } 349 350 n, err := dst.CopyOutFrom(ctx, &fileReadWriter{f, offset}) 351 if !file.Dirent.Inode.MountSource.Flags.NoAtime { 352 // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). 353 f.attrMu.Lock() 354 f.attr.AccessTime = ktime.NowFromContext(ctx) 355 f.attrMu.Unlock() 356 } 357 return n, err 358 } 359 360 func (f *fileInodeOperations) write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { 361 // Zero length writes for tmpfs are no-ops. 362 if src.NumBytes() == 0 { 363 return 0, nil 364 } 365 366 f.attrMu.Lock() 367 defer f.attrMu.Unlock() 368 // Compare Linux's mm/filemap.c:__generic_file_write_iter() => file_update_time(). 369 now := ktime.NowFromContext(ctx) 370 f.attr.ModificationTime = now 371 f.attr.StatusChangeTime = now 372 nwritten, err := src.CopyInTo(ctx, &fileReadWriter{f, offset}) 373 374 // Writing clears privilege bits. 375 if nwritten > 0 { 376 f.attr.Perms.DropSetUIDAndMaybeGID() 377 } 378 379 return nwritten, err 380 } 381 382 type fileReadWriter struct { 383 f *fileInodeOperations 384 offset int64 385 } 386 387 // ReadToBlocks implements safemem.Reader.ReadToBlocks. 388 func (rw *fileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { 389 rw.f.dataMu.RLock() 390 defer rw.f.dataMu.RUnlock() 391 392 // Compute the range to read. 393 if rw.offset >= rw.f.attr.Size { 394 return 0, io.EOF 395 } 396 end := fs.ReadEndOffset(rw.offset, int64(dsts.NumBytes()), rw.f.attr.Size) 397 if end == rw.offset { // dsts.NumBytes() == 0? 398 return 0, nil 399 } 400 401 mf := rw.f.kernel.MemoryFile() 402 var done uint64 403 seg, gap := rw.f.data.Find(uint64(rw.offset)) 404 for rw.offset < end { 405 mr := memmap.MappableRange{uint64(rw.offset), uint64(end)} 406 switch { 407 case seg.Ok(): 408 // Get internal mappings. 409 ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) 410 if err != nil { 411 return done, err 412 } 413 414 // Copy from internal mappings. 415 n, err := safemem.CopySeq(dsts, ims) 416 done += n 417 rw.offset += int64(n) 418 dsts = dsts.DropFirst64(n) 419 if err != nil { 420 return done, err 421 } 422 423 // Continue. 424 seg, gap = seg.NextNonEmpty() 425 426 case gap.Ok(): 427 // Tmpfs holes are zero-filled. 428 gapmr := gap.Range().Intersect(mr) 429 dst := dsts.TakeFirst64(gapmr.Length()) 430 n, err := safemem.ZeroSeq(dst) 431 done += n 432 rw.offset += int64(n) 433 dsts = dsts.DropFirst64(n) 434 if err != nil { 435 return done, err 436 } 437 438 // Continue. 439 seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} 440 } 441 } 442 return done, nil 443 } 444 445 // WriteFromBlocks implements safemem.Writer.WriteFromBlocks. 446 func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { 447 rw.f.dataMu.Lock() 448 defer rw.f.dataMu.Unlock() 449 450 // Compute the range to write. 451 if srcs.NumBytes() == 0 { 452 // Nothing to do. 453 return 0, nil 454 } 455 end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) 456 if end == math.MaxInt64 { 457 // Overflow. 458 return 0, linuxerr.EINVAL 459 } 460 461 // Check if seals prevent either file growth or all writes. 462 switch { 463 case rw.f.seals&linux.F_SEAL_WRITE != 0: // Write sealed 464 return 0, linuxerr.EPERM 465 case end > rw.f.attr.Size && rw.f.seals&linux.F_SEAL_GROW != 0: // Grow sealed 466 // When growth is sealed, Linux effectively allows writes which would 467 // normally grow the file to partially succeed up to the current EOF, 468 // rounded down to the page boundary before the EOF. 469 // 470 // This happens because writes (and thus the growth check) for tmpfs 471 // files proceed page-by-page on Linux, and the final write to the page 472 // containing EOF fails, resulting in a partial write up to the start of 473 // that page. 474 // 475 // To emulate this behaviour, artifically truncate the write to the 476 // start of the page containing the current EOF. 477 // 478 // See Linux, mm/filemap.c:generic_perform_write() and 479 // mm/shmem.c:shmem_write_begin(). 480 if pgstart := int64(hostarch.Addr(rw.f.attr.Size).RoundDown()); end > pgstart { 481 end = pgstart 482 } 483 if end <= rw.offset { 484 // Truncation would result in no data being written. 485 return 0, linuxerr.EPERM 486 } 487 } 488 489 defer func() { 490 // If the write ends beyond the file's previous size, it causes the 491 // file to grow. 492 if rw.offset > rw.f.attr.Size { 493 rw.f.attr.Size = rw.offset 494 } 495 }() 496 497 mf := rw.f.kernel.MemoryFile() 498 // Page-aligned mr for when we need to allocate memory. RoundUp can't 499 // overflow since end is an int64. 500 pgstartaddr := hostarch.Addr(rw.offset).RoundDown() 501 pgendaddr, _ := hostarch.Addr(end).RoundUp() 502 pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} 503 504 var done uint64 505 seg, gap := rw.f.data.Find(uint64(rw.offset)) 506 for rw.offset < end { 507 mr := memmap.MappableRange{uint64(rw.offset), uint64(end)} 508 switch { 509 case seg.Ok(): 510 // Get internal mappings. 511 ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write) 512 if err != nil { 513 return done, err 514 } 515 516 // Copy to internal mappings. 517 n, err := safemem.CopySeq(ims, srcs) 518 done += n 519 rw.offset += int64(n) 520 srcs = srcs.DropFirst64(n) 521 if err != nil { 522 return done, err 523 } 524 525 // Continue. 526 seg, gap = seg.NextNonEmpty() 527 528 case gap.Ok(): 529 // Allocate memory for the write. 530 gapMR := gap.Range().Intersect(pgMR) 531 fr, err := mf.Allocate(gapMR.Length(), rw.f.memUsage) 532 if err != nil { 533 return done, err 534 } 535 536 // Write to that memory as usual. 537 seg, gap = rw.f.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{} 538 } 539 } 540 return done, nil 541 } 542 543 // AddMapping implements memmap.Mappable.AddMapping. 544 func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 545 f.mapsMu.Lock() 546 defer f.mapsMu.Unlock() 547 548 f.dataMu.RLock() 549 defer f.dataMu.RUnlock() 550 551 // Reject writable mapping if F_SEAL_WRITE is set. 552 if f.seals&linux.F_SEAL_WRITE != 0 && writable { 553 return linuxerr.EPERM 554 } 555 556 f.mappings.AddMapping(ms, ar, offset, writable) 557 if writable { 558 pagesBefore := f.writableMappingPages 559 560 // ar is guaranteed to be page aligned per memmap.Mappable. 561 f.writableMappingPages += uint64(ar.Length() / hostarch.PageSize) 562 563 if f.writableMappingPages < pagesBefore { 564 panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages)) 565 } 566 } 567 568 return nil 569 } 570 571 // RemoveMapping implements memmap.Mappable.RemoveMapping. 572 func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 573 f.mapsMu.Lock() 574 defer f.mapsMu.Unlock() 575 576 f.mappings.RemoveMapping(ms, ar, offset, writable) 577 578 if writable { 579 pagesBefore := f.writableMappingPages 580 581 // ar is guaranteed to be page aligned per memmap.Mappable. 582 f.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize) 583 584 if f.writableMappingPages > pagesBefore { 585 panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages)) 586 } 587 } 588 } 589 590 // CopyMapping implements memmap.Mappable.CopyMapping. 591 func (f *fileInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 592 return f.AddMapping(ctx, ms, dstAR, offset, writable) 593 } 594 595 // Translate implements memmap.Mappable.Translate. 596 func (f *fileInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 597 f.dataMu.Lock() 598 defer f.dataMu.Unlock() 599 600 // Constrain translations to f.attr.Size (rounded up) to prevent 601 // translation to pages that may be concurrently truncated. 602 pgend := fs.OffsetPageEnd(f.attr.Size) 603 var beyondEOF bool 604 if required.End > pgend { 605 if required.Start >= pgend { 606 return nil, &memmap.BusError{io.EOF} 607 } 608 beyondEOF = true 609 required.End = pgend 610 } 611 if optional.End > pgend { 612 optional.End = pgend 613 } 614 615 mf := f.kernel.MemoryFile() 616 cerr := f.data.Fill(ctx, required, optional, uint64(f.attr.Size), mf, f.memUsage, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { 617 // Newly-allocated pages are zeroed, so we don't need to do anything. 618 return dsts.NumBytes(), nil 619 }) 620 621 var ts []memmap.Translation 622 var translatedEnd uint64 623 for seg := f.data.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { 624 segMR := seg.Range().Intersect(optional) 625 ts = append(ts, memmap.Translation{ 626 Source: segMR, 627 File: mf, 628 Offset: seg.FileRangeOf(segMR).Start, 629 Perms: hostarch.AnyAccess, 630 }) 631 translatedEnd = segMR.End 632 } 633 634 // Don't return the error returned by f.data.Fill if it occurred outside of 635 // required. 636 if translatedEnd < required.End && cerr != nil { 637 return ts, &memmap.BusError{cerr} 638 } 639 if beyondEOF { 640 return ts, &memmap.BusError{io.EOF} 641 } 642 return ts, nil 643 } 644 645 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 646 func (f *fileInodeOperations) InvalidateUnsavable(ctx context.Context) error { 647 return nil 648 } 649 650 // GetSeals returns the current set of seals on a memfd inode. 651 func GetSeals(inode *fs.Inode) (uint32, error) { 652 if f, ok := inode.InodeOperations.(*fileInodeOperations); ok { 653 f.dataMu.RLock() 654 defer f.dataMu.RUnlock() 655 return f.seals, nil 656 } 657 // Not a memfd inode. 658 return 0, linuxerr.EINVAL 659 } 660 661 // AddSeals adds new file seals to a memfd inode. 662 func AddSeals(inode *fs.Inode, val uint32) error { 663 if f, ok := inode.InodeOperations.(*fileInodeOperations); ok { 664 f.mapsMu.Lock() 665 defer f.mapsMu.Unlock() 666 f.dataMu.Lock() 667 defer f.dataMu.Unlock() 668 669 if f.seals&linux.F_SEAL_SEAL != 0 { 670 // Seal applied which prevents addition of any new seals. 671 return linuxerr.EPERM 672 } 673 674 // F_SEAL_WRITE can only be added if there are no active writable maps. 675 if f.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 { 676 if f.writableMappingPages > 0 { 677 return linuxerr.EBUSY 678 } 679 } 680 681 // Seals can only be added, never removed. 682 f.seals |= val 683 return nil 684 } 685 // Not a memfd inode. 686 return linuxerr.EINVAL 687 }