github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/tmpfs/regular_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tmpfs 16 17 import ( 18 "fmt" 19 "io" 20 "math" 21 "sync/atomic" 22 23 "github.com/SagerNet/gvisor/pkg/abi/linux" 24 "github.com/SagerNet/gvisor/pkg/context" 25 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 26 "github.com/SagerNet/gvisor/pkg/hostarch" 27 "github.com/SagerNet/gvisor/pkg/safemem" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs" 29 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 30 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 31 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 32 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 33 "github.com/SagerNet/gvisor/pkg/sentry/pgalloc" 34 "github.com/SagerNet/gvisor/pkg/sentry/usage" 35 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 36 "github.com/SagerNet/gvisor/pkg/sync" 37 "github.com/SagerNet/gvisor/pkg/syserror" 38 "github.com/SagerNet/gvisor/pkg/usermem" 39 ) 40 41 // regularFile is a regular (=S_IFREG) tmpfs file. 42 // 43 // +stateify savable 44 type regularFile struct { 45 inode inode 46 47 // memFile is a platform.File used to allocate pages to this regularFile. 48 memFile *pgalloc.MemoryFile `state:"nosave"` 49 50 // memoryUsageKind is the memory accounting category under which pages backing 51 // this regularFile's contents are accounted. 52 memoryUsageKind usage.MemoryKind 53 54 // mapsMu protects mappings. 55 mapsMu sync.Mutex `state:"nosave"` 56 57 // mappings tracks mappings of the file into memmap.MappingSpaces. 58 // 59 // Protected by mapsMu. 60 mappings memmap.MappingSet 61 62 // writableMappingPages tracks how many pages of virtual memory are mapped 63 // as potentially writable from this file. If a page has multiple mappings, 64 // each mapping is counted separately. 65 // 66 // This counter is susceptible to overflow as we can potentially count 67 // mappings from many VMAs. We count pages rather than bytes to slightly 68 // mitigate this. 69 // 70 // Protected by mapsMu. 71 writableMappingPages uint64 72 73 // dataMu protects the fields below. 74 dataMu sync.RWMutex `state:"nosave"` 75 76 // data maps offsets into the file to offsets into memFile that store 77 // the file's data. 78 // 79 // Protected by dataMu. 80 data fsutil.FileRangeSet 81 82 // seals represents file seals on this inode. 83 // 84 // Protected by dataMu. 85 seals uint32 86 87 // size is the size of data. 88 // 89 // Protected by both dataMu and inode.mu; reading it requires holding 90 // either mutex, while writing requires holding both AND using atomics. 91 // Readers that do not require consistency (like Stat) may read the 92 // value atomically without holding either lock. 93 size uint64 94 } 95 96 func (fs *filesystem) newRegularFile(kuid auth.KUID, kgid auth.KGID, mode linux.FileMode, parentDir *directory) *inode { 97 file := ®ularFile{ 98 memFile: fs.mfp.MemoryFile(), 99 memoryUsageKind: usage.Tmpfs, 100 seals: linux.F_SEAL_SEAL, 101 } 102 file.inode.init(file, fs, kuid, kgid, linux.S_IFREG|mode, parentDir) 103 file.inode.nlink = 1 // from parent directory 104 return &file.inode 105 } 106 107 // newUnlinkedRegularFileDescription creates a regular file on the tmpfs 108 // filesystem represented by mount and returns an FD representing that file. 109 // The new file is not reachable by path traversal from any other file. 110 // 111 // newUnlinkedRegularFileDescription is analogous to Linux's 112 // mm/shmem.c:__shmem_file_setup(). 113 // 114 // Preconditions: mount must be a tmpfs mount. 115 func newUnlinkedRegularFileDescription(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, name string) (*regularFileFD, error) { 116 fs, ok := mount.Filesystem().Impl().(*filesystem) 117 if !ok { 118 panic("tmpfs.newUnlinkedRegularFileDescription() called with non-tmpfs mount") 119 } 120 121 inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777, nil /* parentDir */) 122 d := fs.newDentry(inode) 123 defer d.DecRef(ctx) 124 d.name = name 125 126 fd := ®ularFileFD{} 127 fd.Init(&inode.locks) 128 flags := uint32(linux.O_RDWR) 129 if err := fd.vfsfd.Init(fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { 130 return nil, err 131 } 132 return fd, nil 133 } 134 135 // NewZeroFile creates a new regular file and file description as for 136 // mmap(MAP_SHARED | MAP_ANONYMOUS). The file has the given size and is 137 // initially (implicitly) filled with zeroes. 138 // 139 // Preconditions: mount must be a tmpfs mount. 140 func NewZeroFile(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, size uint64) (*vfs.FileDescription, error) { 141 // Compare mm/shmem.c:shmem_zero_setup(). 142 fd, err := newUnlinkedRegularFileDescription(ctx, creds, mount, "dev/zero") 143 if err != nil { 144 return nil, err 145 } 146 rf := fd.inode().impl.(*regularFile) 147 rf.memoryUsageKind = usage.Anonymous 148 rf.size = size 149 return &fd.vfsfd, err 150 } 151 152 // NewMemfd creates a new regular file and file description as for 153 // memfd_create. 154 // 155 // Preconditions: mount must be a tmpfs mount. 156 func NewMemfd(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, allowSeals bool, name string) (*vfs.FileDescription, error) { 157 fd, err := newUnlinkedRegularFileDescription(ctx, creds, mount, name) 158 if err != nil { 159 return nil, err 160 } 161 if allowSeals { 162 fd.inode().impl.(*regularFile).seals = 0 163 } 164 return &fd.vfsfd, nil 165 } 166 167 // truncate grows or shrinks the file to the given size. It returns true if the 168 // file size was updated. 169 func (rf *regularFile) truncate(newSize uint64) (bool, error) { 170 rf.inode.mu.Lock() 171 defer rf.inode.mu.Unlock() 172 return rf.truncateLocked(newSize) 173 } 174 175 // Preconditions: rf.inode.mu must be held. 176 func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) { 177 oldSize := rf.size 178 if newSize == oldSize { 179 // Nothing to do. 180 return false, nil 181 } 182 183 // Need to hold inode.mu and dataMu while modifying size. 184 rf.dataMu.Lock() 185 if newSize > oldSize { 186 // Can we grow the file? 187 if rf.seals&linux.F_SEAL_GROW != 0 { 188 rf.dataMu.Unlock() 189 return false, linuxerr.EPERM 190 } 191 // We only need to update the file size. 192 atomic.StoreUint64(&rf.size, newSize) 193 rf.dataMu.Unlock() 194 return true, nil 195 } 196 197 // We are shrinking the file. First check if this is allowed. 198 if rf.seals&linux.F_SEAL_SHRINK != 0 { 199 rf.dataMu.Unlock() 200 return false, linuxerr.EPERM 201 } 202 203 // Update the file size. 204 atomic.StoreUint64(&rf.size, newSize) 205 rf.dataMu.Unlock() 206 207 // Invalidate past translations of truncated pages. 208 oldpgend := fs.OffsetPageEnd(int64(oldSize)) 209 newpgend := fs.OffsetPageEnd(int64(newSize)) 210 if newpgend < oldpgend { 211 rf.mapsMu.Lock() 212 rf.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ 213 // Compare Linux's mm/shmem.c:shmem_setattr() => 214 // mm/memory.c:unmap_mapping_range(evencows=1). 215 InvalidatePrivate: true, 216 }) 217 rf.mapsMu.Unlock() 218 } 219 220 // We are now guaranteed that there are no translations of truncated pages, 221 // and can remove them. 222 rf.dataMu.Lock() 223 rf.data.Truncate(newSize, rf.memFile) 224 rf.dataMu.Unlock() 225 return true, nil 226 } 227 228 // AddMapping implements memmap.Mappable.AddMapping. 229 func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 230 rf.mapsMu.Lock() 231 defer rf.mapsMu.Unlock() 232 rf.dataMu.RLock() 233 defer rf.dataMu.RUnlock() 234 235 // Reject writable mapping if F_SEAL_WRITE is set. 236 if rf.seals&linux.F_SEAL_WRITE != 0 && writable { 237 return linuxerr.EPERM 238 } 239 240 rf.mappings.AddMapping(ms, ar, offset, writable) 241 if writable { 242 pagesBefore := rf.writableMappingPages 243 244 // ar is guaranteed to be page aligned per memmap.Mappable. 245 rf.writableMappingPages += uint64(ar.Length() / hostarch.PageSize) 246 247 if rf.writableMappingPages < pagesBefore { 248 panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) 249 } 250 } 251 252 return nil 253 } 254 255 // RemoveMapping implements memmap.Mappable.RemoveMapping. 256 func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 257 rf.mapsMu.Lock() 258 defer rf.mapsMu.Unlock() 259 260 rf.mappings.RemoveMapping(ms, ar, offset, writable) 261 262 if writable { 263 pagesBefore := rf.writableMappingPages 264 265 // ar is guaranteed to be page aligned per memmap.Mappable. 266 rf.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize) 267 268 if rf.writableMappingPages > pagesBefore { 269 panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) 270 } 271 } 272 } 273 274 // CopyMapping implements memmap.Mappable.CopyMapping. 275 func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 276 return rf.AddMapping(ctx, ms, dstAR, offset, writable) 277 } 278 279 // Translate implements memmap.Mappable.Translate. 280 func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 281 rf.dataMu.Lock() 282 defer rf.dataMu.Unlock() 283 284 // Constrain translations to f.attr.Size (rounded up) to prevent 285 // translation to pages that may be concurrently truncated. 286 pgend := fs.OffsetPageEnd(int64(rf.size)) 287 var beyondEOF bool 288 if required.End > pgend { 289 if required.Start >= pgend { 290 return nil, &memmap.BusError{io.EOF} 291 } 292 beyondEOF = true 293 required.End = pgend 294 } 295 if optional.End > pgend { 296 optional.End = pgend 297 } 298 299 cerr := rf.data.Fill(ctx, required, optional, rf.size, rf.memFile, rf.memoryUsageKind, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { 300 // Newly-allocated pages are zeroed, so we don't need to do anything. 301 return dsts.NumBytes(), nil 302 }) 303 304 var ts []memmap.Translation 305 var translatedEnd uint64 306 for seg := rf.data.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { 307 segMR := seg.Range().Intersect(optional) 308 ts = append(ts, memmap.Translation{ 309 Source: segMR, 310 File: rf.memFile, 311 Offset: seg.FileRangeOf(segMR).Start, 312 Perms: hostarch.AnyAccess, 313 }) 314 translatedEnd = segMR.End 315 } 316 317 // Don't return the error returned by f.data.Fill if it occurred outside of 318 // required. 319 if translatedEnd < required.End && cerr != nil { 320 return ts, &memmap.BusError{cerr} 321 } 322 if beyondEOF { 323 return ts, &memmap.BusError{io.EOF} 324 } 325 return ts, nil 326 } 327 328 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 329 func (*regularFile) InvalidateUnsavable(context.Context) error { 330 return nil 331 } 332 333 // +stateify savable 334 type regularFileFD struct { 335 fileDescription 336 337 // off is the file offset. off is accessed using atomic memory operations. 338 // offMu serializes operations that may mutate off. 339 off int64 340 offMu sync.Mutex `state:"nosave"` 341 } 342 343 // Release implements vfs.FileDescriptionImpl.Release. 344 func (fd *regularFileFD) Release(context.Context) { 345 // noop 346 } 347 348 // Allocate implements vfs.FileDescriptionImpl.Allocate. 349 func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 350 f := fd.inode().impl.(*regularFile) 351 352 f.inode.mu.Lock() 353 defer f.inode.mu.Unlock() 354 oldSize := f.size 355 size := offset + length 356 if oldSize >= size { 357 return nil 358 } 359 _, err := f.truncateLocked(size) 360 return err 361 } 362 363 // PRead implements vfs.FileDescriptionImpl.PRead. 364 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 365 start := fsmetric.StartReadWait() 366 defer fsmetric.FinishReadWait(fsmetric.TmpfsReadWait, start) 367 fsmetric.TmpfsReads.Increment() 368 369 if offset < 0 { 370 return 0, linuxerr.EINVAL 371 } 372 373 // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since 374 // all state is in-memory. 375 // 376 // TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags. 377 if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { 378 return 0, syserror.EOPNOTSUPP 379 } 380 381 if dst.NumBytes() == 0 { 382 return 0, nil 383 } 384 f := fd.inode().impl.(*regularFile) 385 rw := getRegularFileReadWriter(f, offset) 386 n, err := dst.CopyOutFrom(ctx, rw) 387 putRegularFileReadWriter(rw) 388 fd.inode().touchAtime(fd.vfsfd.Mount()) 389 return n, err 390 } 391 392 // Read implements vfs.FileDescriptionImpl.Read. 393 func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 394 fd.offMu.Lock() 395 n, err := fd.PRead(ctx, dst, fd.off, opts) 396 fd.off += n 397 fd.offMu.Unlock() 398 return n, err 399 } 400 401 // PWrite implements vfs.FileDescriptionImpl.PWrite. 402 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 403 n, _, err := fd.pwrite(ctx, src, offset, opts) 404 return n, err 405 } 406 407 // pwrite returns the number of bytes written, final offset and error. The 408 // final offset should be ignored by PWrite. 409 func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { 410 if offset < 0 { 411 return 0, offset, linuxerr.EINVAL 412 } 413 414 // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since 415 // all state is in-memory. 416 // 417 // TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags. 418 if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { 419 return 0, offset, syserror.EOPNOTSUPP 420 } 421 422 srclen := src.NumBytes() 423 if srclen == 0 { 424 return 0, offset, nil 425 } 426 f := fd.inode().impl.(*regularFile) 427 f.inode.mu.Lock() 428 defer f.inode.mu.Unlock() 429 // If the file is opened with O_APPEND, update offset to file size. 430 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { 431 // Locking f.inode.mu is sufficient for reading f.size. 432 offset = int64(f.size) 433 } 434 if end := offset + srclen; end < offset { 435 // Overflow. 436 return 0, offset, linuxerr.EINVAL 437 } 438 439 srclen, err = vfs.CheckLimit(ctx, offset, srclen) 440 if err != nil { 441 return 0, offset, err 442 } 443 src = src.TakeFirst64(srclen) 444 445 rw := getRegularFileReadWriter(f, offset) 446 n, err := src.CopyInTo(ctx, rw) 447 f.inode.touchCMtimeLocked() 448 for { 449 old := atomic.LoadUint32(&f.inode.mode) 450 new := vfs.ClearSUIDAndSGID(old) 451 if swapped := atomic.CompareAndSwapUint32(&f.inode.mode, old, new); swapped { 452 break 453 } 454 } 455 putRegularFileReadWriter(rw) 456 return n, n + offset, err 457 } 458 459 // Write implements vfs.FileDescriptionImpl.Write. 460 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 461 fd.offMu.Lock() 462 n, off, err := fd.pwrite(ctx, src, fd.off, opts) 463 fd.off = off 464 fd.offMu.Unlock() 465 return n, err 466 } 467 468 // Seek implements vfs.FileDescriptionImpl.Seek. 469 func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 470 fd.offMu.Lock() 471 defer fd.offMu.Unlock() 472 switch whence { 473 case linux.SEEK_SET: 474 // use offset as specified 475 case linux.SEEK_CUR: 476 offset += fd.off 477 case linux.SEEK_END: 478 offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size)) 479 default: 480 return 0, linuxerr.EINVAL 481 } 482 if offset < 0 { 483 return 0, linuxerr.EINVAL 484 } 485 fd.off = offset 486 return offset, nil 487 } 488 489 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. 490 func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 491 file := fd.inode().impl.(*regularFile) 492 opts.SentryOwnedContent = true 493 return vfs.GenericConfigureMMap(&fd.vfsfd, file, opts) 494 } 495 496 // regularFileReadWriter implements safemem.Reader and Safemem.Writer. 497 type regularFileReadWriter struct { 498 file *regularFile 499 500 // Offset into the file to read/write at. Note that this may be 501 // different from the FD offset if PRead/PWrite is used. 502 off uint64 503 } 504 505 var regularFileReadWriterPool = sync.Pool{ 506 New: func() interface{} { 507 return ®ularFileReadWriter{} 508 }, 509 } 510 511 func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter { 512 rw := regularFileReadWriterPool.Get().(*regularFileReadWriter) 513 rw.file = file 514 rw.off = uint64(offset) 515 return rw 516 } 517 518 func putRegularFileReadWriter(rw *regularFileReadWriter) { 519 rw.file = nil 520 regularFileReadWriterPool.Put(rw) 521 } 522 523 // ReadToBlocks implements safemem.Reader.ReadToBlocks. 524 func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { 525 rw.file.dataMu.RLock() 526 defer rw.file.dataMu.RUnlock() 527 size := rw.file.size 528 529 // Compute the range to read (limited by file size and overflow-checked). 530 if rw.off >= size { 531 return 0, io.EOF 532 } 533 end := size 534 if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { 535 end = rend 536 } 537 538 var done uint64 539 seg, gap := rw.file.data.Find(uint64(rw.off)) 540 for rw.off < end { 541 mr := memmap.MappableRange{uint64(rw.off), uint64(end)} 542 switch { 543 case seg.Ok(): 544 // Get internal mappings. 545 ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) 546 if err != nil { 547 return done, err 548 } 549 550 // Copy from internal mappings. 551 n, err := safemem.CopySeq(dsts, ims) 552 done += n 553 rw.off += uint64(n) 554 dsts = dsts.DropFirst64(n) 555 if err != nil { 556 return done, err 557 } 558 559 // Continue. 560 seg, gap = seg.NextNonEmpty() 561 562 case gap.Ok(): 563 // Tmpfs holes are zero-filled. 564 gapmr := gap.Range().Intersect(mr) 565 dst := dsts.TakeFirst64(gapmr.Length()) 566 n, err := safemem.ZeroSeq(dst) 567 done += n 568 rw.off += uint64(n) 569 dsts = dsts.DropFirst64(n) 570 if err != nil { 571 return done, err 572 } 573 574 // Continue. 575 seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} 576 } 577 } 578 return done, nil 579 } 580 581 // WriteFromBlocks implements safemem.Writer.WriteFromBlocks. 582 // 583 // Preconditions: rw.file.inode.mu must be held. 584 func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { 585 // Hold dataMu so we can modify size. 586 rw.file.dataMu.Lock() 587 defer rw.file.dataMu.Unlock() 588 589 // Compute the range to write (overflow-checked). 590 end := rw.off + srcs.NumBytes() 591 if end <= rw.off { 592 end = math.MaxInt64 593 } 594 595 // Check if seals prevent either file growth or all writes. 596 switch { 597 case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed 598 return 0, linuxerr.EPERM 599 case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed 600 // When growth is sealed, Linux effectively allows writes which would 601 // normally grow the file to partially succeed up to the current EOF, 602 // rounded down to the page boundary before the EOF. 603 // 604 // This happens because writes (and thus the growth check) for tmpfs 605 // files proceed page-by-page on Linux, and the final write to the page 606 // containing EOF fails, resulting in a partial write up to the start of 607 // that page. 608 // 609 // To emulate this behaviour, artifically truncate the write to the 610 // start of the page containing the current EOF. 611 // 612 // See Linux, mm/filemap.c:generic_perform_write() and 613 // mm/shmem.c:shmem_write_begin(). 614 if pgstart := uint64(hostarch.Addr(rw.file.size).RoundDown()); end > pgstart { 615 end = pgstart 616 } 617 if end <= rw.off { 618 // Truncation would result in no data being written. 619 return 0, linuxerr.EPERM 620 } 621 } 622 623 // Page-aligned mr for when we need to allocate memory. RoundUp can't 624 // overflow since end is an int64. 625 pgstartaddr := hostarch.Addr(rw.off).RoundDown() 626 pgendaddr, _ := hostarch.Addr(end).RoundUp() 627 pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} 628 629 var ( 630 done uint64 631 retErr error 632 ) 633 seg, gap := rw.file.data.Find(uint64(rw.off)) 634 for rw.off < end { 635 mr := memmap.MappableRange{uint64(rw.off), uint64(end)} 636 switch { 637 case seg.Ok(): 638 // Get internal mappings. 639 ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write) 640 if err != nil { 641 retErr = err 642 goto exitLoop 643 } 644 645 // Copy to internal mappings. 646 n, err := safemem.CopySeq(ims, srcs) 647 done += n 648 rw.off += uint64(n) 649 srcs = srcs.DropFirst64(n) 650 if err != nil { 651 retErr = err 652 goto exitLoop 653 } 654 655 // Continue. 656 seg, gap = seg.NextNonEmpty() 657 658 case gap.Ok(): 659 // Allocate memory for the write. 660 gapMR := gap.Range().Intersect(pgMR) 661 fr, err := rw.file.memFile.Allocate(gapMR.Length(), rw.file.memoryUsageKind) 662 if err != nil { 663 retErr = err 664 goto exitLoop 665 } 666 667 // Write to that memory as usual. 668 seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{} 669 670 default: 671 panic("unreachable") 672 } 673 } 674 exitLoop: 675 // If the write ends beyond the file's previous size, it causes the 676 // file to grow. 677 if rw.off > rw.file.size { 678 atomic.StoreUint64(&rw.file.size, rw.off) 679 } 680 681 return done, retErr 682 } 683 684 // GetSeals returns the current set of seals on a memfd inode. 685 func GetSeals(fd *vfs.FileDescription) (uint32, error) { 686 f, ok := fd.Impl().(*regularFileFD) 687 if !ok { 688 return 0, linuxerr.EINVAL 689 } 690 rf := f.inode().impl.(*regularFile) 691 rf.dataMu.RLock() 692 defer rf.dataMu.RUnlock() 693 return rf.seals, nil 694 } 695 696 // AddSeals adds new file seals to a memfd inode. 697 func AddSeals(fd *vfs.FileDescription, val uint32) error { 698 f, ok := fd.Impl().(*regularFileFD) 699 if !ok { 700 return linuxerr.EINVAL 701 } 702 rf := f.inode().impl.(*regularFile) 703 rf.mapsMu.Lock() 704 defer rf.mapsMu.Unlock() 705 rf.dataMu.RLock() 706 defer rf.dataMu.RUnlock() 707 708 if rf.seals&linux.F_SEAL_SEAL != 0 { 709 // Seal applied which prevents addition of any new seals. 710 return linuxerr.EPERM 711 } 712 713 // F_SEAL_WRITE can only be added if there are no active writable maps. 714 if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 { 715 if rf.writableMappingPages > 0 { 716 return linuxerr.EBUSY 717 } 718 } 719 720 // Seals can only be added, never removed. 721 rf.seals |= val 722 return nil 723 }