github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/pgalloc/pgalloc.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pgalloc contains the page allocator subsystem, which manages memory 16 // that may be mapped into application address spaces. 17 // 18 // Lock order: 19 // 20 // pgalloc.MemoryFile.mu 21 // pgalloc.MemoryFile.mappingsMu 22 package pgalloc 23 24 import ( 25 "fmt" 26 "math" 27 "os" 28 "sync/atomic" 29 "time" 30 31 "golang.org/x/sys/unix" 32 "github.com/SagerNet/gvisor/pkg/abi/linux" 33 "github.com/SagerNet/gvisor/pkg/context" 34 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 35 "github.com/SagerNet/gvisor/pkg/hostarch" 36 "github.com/SagerNet/gvisor/pkg/log" 37 "github.com/SagerNet/gvisor/pkg/safemem" 38 "github.com/SagerNet/gvisor/pkg/sentry/hostmm" 39 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 40 "github.com/SagerNet/gvisor/pkg/sentry/usage" 41 "github.com/SagerNet/gvisor/pkg/sync" 42 "github.com/SagerNet/gvisor/pkg/syserror" 43 ) 44 45 // MemoryFile is a memmap.File whose pages may be allocated to arbitrary 46 // users. 47 type MemoryFile struct { 48 // opts holds options passed to NewMemoryFile. opts is immutable. 49 opts MemoryFileOpts 50 51 // MemoryFile owns a single backing file, which is modeled as follows: 52 // 53 // Each page in the file can be committed or uncommitted. A page is 54 // committed if the host kernel is spending resources to store its contents 55 // and uncommitted otherwise. This definition includes pages that the host 56 // kernel has swapped; this is intentional, to ensure that accounting does 57 // not change even if host kernel swapping behavior changes, and that 58 // memory used by pseudo-swap mechanisms like zswap is still accounted. 59 // 60 // The initial contents of uncommitted pages are implicitly zero bytes. A 61 // read or write to the contents of an uncommitted page causes it to be 62 // committed. This is the only event that can cause a uncommitted page to 63 // be committed. 64 // 65 // fallocate(FALLOC_FL_PUNCH_HOLE) (MemoryFile.Decommit) causes committed 66 // pages to be uncommitted. This is the only event that can cause a 67 // committed page to be uncommitted. 68 // 69 // Memory accounting is based on identifying the set of committed pages. 70 // Since we do not have direct access to the MMU, tracking reads and writes 71 // to uncommitted pages to detect commitment would introduce additional 72 // page faults, which would be prohibitively expensive. Instead, we query 73 // the host kernel to determine which pages are committed. 74 75 // file is the backing file. The file pointer is immutable. 76 file *os.File 77 78 mu sync.Mutex 79 80 // usage maps each page in the file to metadata for that page. Pages for 81 // which no segment exists in usage are both unallocated (not in use) and 82 // uncommitted. 83 // 84 // Since usage stores usageInfo objects by value, clients should usually 85 // use usageIterator.ValuePtr() instead of usageIterator.Value() to get a 86 // pointer to the usageInfo rather than a copy. 87 // 88 // usage must be kept maximally merged (that is, there should never be two 89 // adjacent segments with the same values). At least markReclaimed depends 90 // on this property. 91 // 92 // usage is protected by mu. 93 usage usageSet 94 95 // The UpdateUsage function scans all segments with knownCommitted set 96 // to false, sees which pages are committed and creates corresponding 97 // segments with knownCommitted set to true. 98 // 99 // In order to avoid unnecessary scans, usageExpected tracks the total 100 // file blocks expected. This is used to elide the scan when this 101 // matches the underlying file blocks. 102 // 103 // To track swapped pages, usageSwapped tracks the discrepency between 104 // what is observed in core and what is reported by the file. When 105 // usageSwapped is non-zero, a sweep will be performed at least every 106 // second. The start of the last sweep is recorded in usageLast. 107 // 108 // All usage attributes are all protected by mu. 109 usageExpected uint64 110 usageSwapped uint64 111 usageLast time.Time 112 113 // fileSize is the size of the backing memory file in bytes. fileSize is 114 // always a power-of-two multiple of chunkSize. 115 // 116 // fileSize is protected by mu. 117 fileSize int64 118 119 // Pages from the backing file are mapped into the local address space on 120 // the granularity of large pieces called chunks. mappings is a []uintptr 121 // that stores, for each chunk, the start address of a mapping of that 122 // chunk in the current process' address space, or 0 if no such mapping 123 // exists. Once a chunk is mapped, it is never remapped or unmapped until 124 // the MemoryFile is destroyed. 125 // 126 // Mutating the mappings slice or its contents requires both holding 127 // mappingsMu and using atomic memory operations. (The slice is mutated 128 // whenever the file is expanded. Per the above, the only permitted 129 // mutation of the slice's contents is the assignment of a mapping to a 130 // chunk that was previously unmapped.) Reading the slice or its contents 131 // only requires *either* holding mappingsMu or using atomic memory 132 // operations. This allows MemoryFile.MapInternal to avoid locking in the 133 // common case where chunk mappings already exist. 134 mappingsMu sync.Mutex 135 mappings atomic.Value 136 137 // destroyed is set by Destroy to instruct the reclaimer goroutine to 138 // release resources and exit. destroyed is protected by mu. 139 destroyed bool 140 141 // reclaimable is true if usage may contain reclaimable pages. reclaimable 142 // is protected by mu. 143 reclaimable bool 144 145 // relcaim is the collection of regions for reclaim. relcaim is protected 146 // by mu. 147 reclaim reclaimSet 148 149 // reclaimCond is signaled (with mu locked) when reclaimable or destroyed 150 // transitions from false to true. 151 reclaimCond sync.Cond 152 153 // evictable maps EvictableMemoryUsers to eviction state. 154 // 155 // evictable is protected by mu. 156 evictable map[EvictableMemoryUser]*evictableMemoryUserInfo 157 158 // evictionWG counts the number of goroutines currently performing evictions. 159 evictionWG sync.WaitGroup 160 161 // stopNotifyPressure stops memory cgroup pressure level 162 // notifications used to drive eviction. stopNotifyPressure is 163 // immutable. 164 stopNotifyPressure func() 165 } 166 167 // MemoryFileOpts provides options to NewMemoryFile. 168 type MemoryFileOpts struct { 169 // DelayedEviction controls the extent to which the MemoryFile may delay 170 // eviction of evictable allocations. 171 DelayedEviction DelayedEvictionType 172 173 // If UseHostMemcgPressure is true, use host memory cgroup pressure level 174 // notifications to determine when eviction is necessary. This option has 175 // no effect unless DelayedEviction is DelayedEvictionEnabled. 176 UseHostMemcgPressure bool 177 178 // If ManualZeroing is true, MemoryFile must not assume that new pages 179 // obtained from the host are zero-filled, such that MemoryFile must manually 180 // zero newly-allocated pages. 181 ManualZeroing bool 182 } 183 184 // DelayedEvictionType is the type of MemoryFileOpts.DelayedEviction. 185 type DelayedEvictionType int 186 187 const ( 188 // DelayedEvictionDefault has unspecified behavior. 189 DelayedEvictionDefault DelayedEvictionType = iota 190 191 // DelayedEvictionDisabled requires that evictable allocations are evicted 192 // as soon as possible. 193 DelayedEvictionDisabled 194 195 // DelayedEvictionEnabled requests that the MemoryFile delay eviction of 196 // evictable allocations until doing so is considered necessary to avoid 197 // performance degradation due to host memory pressure, or OOM kills. 198 // 199 // As of this writing, the behavior of DelayedEvictionEnabled depends on 200 // whether or not MemoryFileOpts.UseHostMemcgPressure is enabled: 201 // 202 // - If UseHostMemcgPressure is true, evictions are delayed until memory 203 // pressure is indicated. 204 // 205 // - Otherwise, evictions are only delayed until the reclaimer goroutine 206 // is out of work (pages to reclaim). 207 DelayedEvictionEnabled 208 209 // DelayedEvictionManual requires that evictable allocations are only 210 // evicted when MemoryFile.StartEvictions() is called. This is extremely 211 // dangerous outside of tests. 212 DelayedEvictionManual 213 ) 214 215 // usageInfo tracks usage information. 216 // 217 // +stateify savable 218 type usageInfo struct { 219 // kind is the usage kind. 220 kind usage.MemoryKind 221 222 // knownCommitted is true if the tracked region is definitely committed. 223 // (If it is false, the tracked region may or may not be committed.) 224 knownCommitted bool 225 226 refs uint64 227 } 228 229 // canCommit returns true if the tracked region can be committed. 230 func (u *usageInfo) canCommit() bool { 231 // refs must be greater than 0 because we assume that reclaimable pages 232 // (that aren't already known to be committed) are not committed. This 233 // isn't necessarily true, even after the reclaimer does Decommit(), 234 // because the kernel may subsequently back the hugepage-sized region 235 // containing the decommitted page with a hugepage. However, it's 236 // consistent with our treatment of unallocated pages, which have the same 237 // property. 238 return !u.knownCommitted && u.refs != 0 239 } 240 241 // An EvictableMemoryUser represents a user of MemoryFile-allocated memory that 242 // may be asked to deallocate that memory in the presence of memory pressure. 243 type EvictableMemoryUser interface { 244 // Evict requests that the EvictableMemoryUser deallocate memory used by 245 // er, which was registered as evictable by a previous call to 246 // MemoryFile.MarkEvictable. 247 // 248 // Evict is not required to deallocate memory. In particular, since pgalloc 249 // must call Evict without holding locks to avoid circular lock ordering, 250 // it is possible that the passed range has already been marked as 251 // unevictable by a racing call to MemoryFile.MarkUnevictable. 252 // Implementations of EvictableMemoryUser must detect such races and handle 253 // them by making Evict have no effect on unevictable ranges. 254 // 255 // After a call to Evict, the MemoryFile will consider the evicted range 256 // unevictable (i.e. it will not call Evict on the same range again) until 257 // informed otherwise by a subsequent call to MarkEvictable. 258 Evict(ctx context.Context, er EvictableRange) 259 } 260 261 // An EvictableRange represents a range of uint64 offsets in an 262 // EvictableMemoryUser. 263 // 264 // In practice, most EvictableMemoryUsers will probably be implementations of 265 // memmap.Mappable, and EvictableRange therefore corresponds to 266 // memmap.MappableRange. However, this package cannot depend on the memmap 267 // package, since doing so would create a circular dependency. 268 // 269 // type EvictableRange <generated using go_generics> 270 271 // evictableMemoryUserInfo is the value type of MemoryFile.evictable. 272 type evictableMemoryUserInfo struct { 273 // ranges tracks all evictable ranges for the given user. 274 ranges evictableRangeSet 275 276 // If evicting is true, there is a goroutine currently evicting all 277 // evictable ranges for this user. 278 evicting bool 279 } 280 281 const ( 282 chunkShift = 30 283 chunkSize = 1 << chunkShift // 1 GB 284 chunkMask = chunkSize - 1 285 286 // maxPage is the highest 64-bit page. 287 maxPage = math.MaxUint64 &^ (hostarch.PageSize - 1) 288 ) 289 290 // NewMemoryFile creates a MemoryFile backed by the given file. If 291 // NewMemoryFile succeeds, ownership of file is transferred to the returned 292 // MemoryFile. 293 func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) { 294 switch opts.DelayedEviction { 295 case DelayedEvictionDefault: 296 opts.DelayedEviction = DelayedEvictionEnabled 297 case DelayedEvictionDisabled, DelayedEvictionManual: 298 opts.UseHostMemcgPressure = false 299 case DelayedEvictionEnabled: 300 // ok 301 default: 302 return nil, fmt.Errorf("invalid MemoryFileOpts.DelayedEviction: %v", opts.DelayedEviction) 303 } 304 305 // Truncate the file to 0 bytes first to ensure that it's empty. 306 if err := file.Truncate(0); err != nil { 307 return nil, err 308 } 309 f := &MemoryFile{ 310 opts: opts, 311 file: file, 312 evictable: make(map[EvictableMemoryUser]*evictableMemoryUserInfo), 313 } 314 f.mappings.Store(make([]uintptr, 0)) 315 f.reclaimCond.L = &f.mu 316 317 if f.opts.DelayedEviction == DelayedEvictionEnabled && f.opts.UseHostMemcgPressure { 318 stop, err := hostmm.NotifyCurrentMemcgPressureCallback(func() { 319 f.mu.Lock() 320 startedAny := f.startEvictionsLocked() 321 f.mu.Unlock() 322 if startedAny { 323 log.Debugf("pgalloc.MemoryFile performing evictions due to memcg pressure") 324 } 325 }, "low") 326 if err != nil { 327 return nil, fmt.Errorf("failed to configure memcg pressure level notifications: %v", err) 328 } 329 f.stopNotifyPressure = stop 330 } 331 332 go f.runReclaim() // S/R-SAFE: f.mu 333 334 // The Linux kernel contains an optional feature called "Integrity 335 // Measurement Architecture" (IMA). If IMA is enabled, it will checksum 336 // binaries the first time they are mapped PROT_EXEC. This is bad news for 337 // executable pages mapped from our backing file, which can grow to 338 // terabytes in (sparse) size. If IMA attempts to checksum a file that 339 // large, it will allocate all of the sparse pages and quickly exhaust all 340 // memory. 341 // 342 // Work around IMA by immediately creating a temporary PROT_EXEC mapping, 343 // while the backing file is still small. IMA will ignore any future 344 // mappings. 345 m, _, errno := unix.Syscall6( 346 unix.SYS_MMAP, 347 0, 348 hostarch.PageSize, 349 unix.PROT_EXEC, 350 unix.MAP_SHARED, 351 file.Fd(), 352 0) 353 if errno != 0 { 354 // This isn't fatal (IMA may not even be in use). Log the error, but 355 // don't return it. 356 log.Warningf("Failed to pre-map MemoryFile PROT_EXEC: %v", errno) 357 } else { 358 if _, _, errno := unix.Syscall( 359 unix.SYS_MUNMAP, 360 m, 361 hostarch.PageSize, 362 0); errno != 0 { 363 panic(fmt.Sprintf("failed to unmap PROT_EXEC MemoryFile mapping: %v", errno)) 364 } 365 } 366 367 return f, nil 368 } 369 370 // Destroy releases all resources used by f. 371 // 372 // Preconditions: All pages allocated by f have been freed. 373 // 374 // Postconditions: None of f's methods may be called after Destroy. 375 func (f *MemoryFile) Destroy() { 376 f.mu.Lock() 377 defer f.mu.Unlock() 378 f.destroyed = true 379 f.reclaimCond.Signal() 380 } 381 382 // Allocate returns a range of initially-zeroed pages of the given length with 383 // the given accounting kind and a single reference held by the caller. When 384 // the last reference on an allocated page is released, ownership of the page 385 // is returned to the MemoryFile, allowing it to be returned by a future call 386 // to Allocate. 387 // 388 // Preconditions: length must be page-aligned and non-zero. 389 func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.FileRange, error) { 390 if length == 0 || length%hostarch.PageSize != 0 { 391 panic(fmt.Sprintf("invalid allocation length: %#x", length)) 392 } 393 394 f.mu.Lock() 395 defer f.mu.Unlock() 396 397 // Align hugepage-and-larger allocations on hugepage boundaries to try 398 // to take advantage of hugetmpfs. 399 alignment := uint64(hostarch.PageSize) 400 if length >= hostarch.HugePageSize { 401 alignment = hostarch.HugePageSize 402 } 403 404 // Find a range in the underlying file. 405 fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment) 406 if !ok { 407 return memmap.FileRange{}, syserror.ENOMEM 408 } 409 410 // Expand the file if needed. 411 if int64(fr.End) > f.fileSize { 412 // Round the new file size up to be chunk-aligned. 413 newFileSize := (int64(fr.End) + chunkMask) &^ chunkMask 414 if err := f.file.Truncate(newFileSize); err != nil { 415 return memmap.FileRange{}, err 416 } 417 f.fileSize = newFileSize 418 f.mappingsMu.Lock() 419 oldMappings := f.mappings.Load().([]uintptr) 420 newMappings := make([]uintptr, newFileSize>>chunkShift) 421 copy(newMappings, oldMappings) 422 f.mappings.Store(newMappings) 423 f.mappingsMu.Unlock() 424 } 425 426 if f.opts.ManualZeroing { 427 if err := f.manuallyZero(fr); err != nil { 428 return memmap.FileRange{}, err 429 } 430 } 431 // Mark selected pages as in use. 432 if !f.usage.Add(fr, usageInfo{ 433 kind: kind, 434 refs: 1, 435 }) { 436 panic(fmt.Sprintf("allocating %v: failed to insert into usage set:\n%v", fr, &f.usage)) 437 } 438 439 return fr, nil 440 } 441 442 // findAvailableRange returns an available range in the usageSet. 443 // 444 // Note that scanning for available slots takes place from end first backwards, 445 // then forwards. This heuristic has important consequence for how sequential 446 // mappings can be merged in the host VMAs, given that addresses for both 447 // application and sentry mappings are allocated top-down (from higher to 448 // lower addresses). The file is also grown expoentially in order to create 449 // space for mappings to be allocated downwards. 450 // 451 // Precondition: alignment must be a power of 2. 452 func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (memmap.FileRange, bool) { 453 alignmentMask := alignment - 1 454 455 // Search for space in existing gaps, starting at the current end of the 456 // file and working backward. 457 lastGap := usage.LastGap() 458 gap := lastGap 459 for { 460 end := gap.End() 461 if end > uint64(fileSize) { 462 end = uint64(fileSize) 463 } 464 465 // Try to allocate from the end of this gap, with the start of the 466 // allocated range aligned down to alignment. 467 unalignedStart := end - length 468 if unalignedStart > end { 469 // Negative overflow: this and all preceding gaps are too small to 470 // accommodate length. 471 break 472 } 473 if start := unalignedStart &^ alignmentMask; start >= gap.Start() { 474 return memmap.FileRange{start, start + length}, true 475 } 476 477 gap = gap.PrevLargeEnoughGap(length) 478 if !gap.Ok() { 479 break 480 } 481 } 482 483 // Check that it's possible to fit this allocation at the end of a file of any size. 484 min := lastGap.Start() 485 min = (min + alignmentMask) &^ alignmentMask 486 if min+length < min { 487 // Overflow: allocation would exceed the range of uint64. 488 return memmap.FileRange{}, false 489 } 490 491 // Determine the minimum file size required to fit this allocation at its end. 492 for { 493 newFileSize := 2 * fileSize 494 if newFileSize <= fileSize { 495 if fileSize != 0 { 496 // Overflow: allocation would exceed the range of int64. 497 return memmap.FileRange{}, false 498 } 499 newFileSize = chunkSize 500 } 501 fileSize = newFileSize 502 503 unalignedStart := uint64(fileSize) - length 504 if unalignedStart > uint64(fileSize) { 505 // Negative overflow: fileSize is still inadequate. 506 continue 507 } 508 if start := unalignedStart &^ alignmentMask; start >= min { 509 return memmap.FileRange{start, start + length}, true 510 } 511 } 512 } 513 514 // AllocateAndFill allocates memory of the given kind and fills it by calling 515 // r.ReadToBlocks() repeatedly until either length bytes are read or a non-nil 516 // error is returned. It returns the memory filled by r, truncated down to the 517 // nearest page. If this is shorter than length bytes due to an error returned 518 // by r.ReadToBlocks(), it returns that error. 519 // 520 // Preconditions: 521 // * length > 0. 522 // * length must be page-aligned. 523 func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (memmap.FileRange, error) { 524 fr, err := f.Allocate(length, kind) 525 if err != nil { 526 return memmap.FileRange{}, err 527 } 528 dsts, err := f.MapInternal(fr, hostarch.Write) 529 if err != nil { 530 f.DecRef(fr) 531 return memmap.FileRange{}, err 532 } 533 n, err := safemem.ReadFullToBlocks(r, dsts) 534 un := uint64(hostarch.Addr(n).RoundDown()) 535 if un < length { 536 // Free unused memory and update fr to contain only the memory that is 537 // still allocated. 538 f.DecRef(memmap.FileRange{fr.Start + un, fr.End}) 539 fr.End = fr.Start + un 540 } 541 return fr, err 542 } 543 544 // fallocate(2) modes, defined in Linux's include/uapi/linux/falloc.h. 545 const ( 546 _FALLOC_FL_KEEP_SIZE = 1 547 _FALLOC_FL_PUNCH_HOLE = 2 548 ) 549 550 // Decommit releases resources associated with maintaining the contents of the 551 // given pages. If Decommit succeeds, future accesses of the decommitted pages 552 // will read zeroes. 553 // 554 // Preconditions: fr.Length() > 0. 555 func (f *MemoryFile) Decommit(fr memmap.FileRange) error { 556 if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { 557 panic(fmt.Sprintf("invalid range: %v", fr)) 558 } 559 560 if f.opts.ManualZeroing { 561 // FALLOC_FL_PUNCH_HOLE may not zero pages if ManualZeroing is in 562 // effect. 563 if err := f.manuallyZero(fr); err != nil { 564 return err 565 } 566 } else { 567 if err := f.decommitFile(fr); err != nil { 568 return err 569 } 570 } 571 572 f.markDecommitted(fr) 573 return nil 574 } 575 576 func (f *MemoryFile) manuallyZero(fr memmap.FileRange) error { 577 return f.forEachMappingSlice(fr, func(bs []byte) { 578 for i := range bs { 579 bs[i] = 0 580 } 581 }) 582 } 583 584 func (f *MemoryFile) decommitFile(fr memmap.FileRange) error { 585 // "After a successful call, subsequent reads from this range will 586 // return zeroes. The FALLOC_FL_PUNCH_HOLE flag must be ORed with 587 // FALLOC_FL_KEEP_SIZE in mode ..." - fallocate(2) 588 return unix.Fallocate( 589 int(f.file.Fd()), 590 _FALLOC_FL_PUNCH_HOLE|_FALLOC_FL_KEEP_SIZE, 591 int64(fr.Start), 592 int64(fr.Length())) 593 } 594 595 func (f *MemoryFile) markDecommitted(fr memmap.FileRange) { 596 f.mu.Lock() 597 defer f.mu.Unlock() 598 // Since we're changing the knownCommitted attribute, we need to merge 599 // across the entire range to ensure that the usage tree is minimal. 600 gap := f.usage.ApplyContiguous(fr, func(seg usageIterator) { 601 val := seg.ValuePtr() 602 if val.knownCommitted { 603 // Drop the usageExpected appropriately. 604 amount := seg.Range().Length() 605 usage.MemoryAccounting.Dec(amount, val.kind) 606 f.usageExpected -= amount 607 val.knownCommitted = false 608 } 609 }) 610 if gap.Ok() { 611 panic(fmt.Sprintf("Decommit(%v): attempted to decommit unallocated pages %v:\n%v", fr, gap.Range(), &f.usage)) 612 } 613 f.usage.MergeRange(fr) 614 } 615 616 // IncRef implements memmap.File.IncRef. 617 func (f *MemoryFile) IncRef(fr memmap.FileRange) { 618 if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { 619 panic(fmt.Sprintf("invalid range: %v", fr)) 620 } 621 622 f.mu.Lock() 623 defer f.mu.Unlock() 624 625 gap := f.usage.ApplyContiguous(fr, func(seg usageIterator) { 626 seg.ValuePtr().refs++ 627 }) 628 if gap.Ok() { 629 panic(fmt.Sprintf("IncRef(%v): attempted to IncRef on unallocated pages %v:\n%v", fr, gap.Range(), &f.usage)) 630 } 631 632 f.usage.MergeAdjacent(fr) 633 } 634 635 // DecRef implements memmap.File.DecRef. 636 func (f *MemoryFile) DecRef(fr memmap.FileRange) { 637 if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { 638 panic(fmt.Sprintf("invalid range: %v", fr)) 639 } 640 641 var freed bool 642 643 f.mu.Lock() 644 defer f.mu.Unlock() 645 646 for seg := f.usage.FindSegment(fr.Start); seg.Ok() && seg.Start() < fr.End; seg = seg.NextSegment() { 647 seg = f.usage.Isolate(seg, fr) 648 val := seg.ValuePtr() 649 if val.refs == 0 { 650 panic(fmt.Sprintf("DecRef(%v): 0 existing references on %v:\n%v", fr, seg.Range(), &f.usage)) 651 } 652 val.refs-- 653 if val.refs == 0 { 654 f.reclaim.Add(seg.Range(), reclaimSetValue{}) 655 freed = true 656 // Reclassify memory as System, until it's freed by the reclaim 657 // goroutine. 658 if val.knownCommitted { 659 usage.MemoryAccounting.Move(seg.Range().Length(), usage.System, val.kind) 660 } 661 val.kind = usage.System 662 } 663 } 664 f.usage.MergeAdjacent(fr) 665 666 if freed { 667 f.reclaimable = true 668 f.reclaimCond.Signal() 669 } 670 } 671 672 // MapInternal implements memmap.File.MapInternal. 673 func (f *MemoryFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { 674 if !fr.WellFormed() || fr.Length() == 0 { 675 panic(fmt.Sprintf("invalid range: %v", fr)) 676 } 677 if at.Execute { 678 return safemem.BlockSeq{}, linuxerr.EACCES 679 } 680 681 chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) 682 if chunks == 1 { 683 // Avoid an unnecessary slice allocation. 684 var seq safemem.BlockSeq 685 err := f.forEachMappingSlice(fr, func(bs []byte) { 686 seq = safemem.BlockSeqOf(safemem.BlockFromSafeSlice(bs)) 687 }) 688 return seq, err 689 } 690 blocks := make([]safemem.Block, 0, chunks) 691 err := f.forEachMappingSlice(fr, func(bs []byte) { 692 blocks = append(blocks, safemem.BlockFromSafeSlice(bs)) 693 }) 694 return safemem.BlockSeqFromSlice(blocks), err 695 } 696 697 // forEachMappingSlice invokes fn on a sequence of byte slices that 698 // collectively map all bytes in fr. 699 func (f *MemoryFile) forEachMappingSlice(fr memmap.FileRange, fn func([]byte)) error { 700 mappings := f.mappings.Load().([]uintptr) 701 for chunkStart := fr.Start &^ chunkMask; chunkStart < fr.End; chunkStart += chunkSize { 702 chunk := int(chunkStart >> chunkShift) 703 m := atomic.LoadUintptr(&mappings[chunk]) 704 if m == 0 { 705 var err error 706 mappings, m, err = f.getChunkMapping(chunk) 707 if err != nil { 708 return err 709 } 710 } 711 startOff := uint64(0) 712 if chunkStart < fr.Start { 713 startOff = fr.Start - chunkStart 714 } 715 endOff := uint64(chunkSize) 716 if chunkStart+chunkSize > fr.End { 717 endOff = fr.End - chunkStart 718 } 719 fn(unsafeSlice(m, chunkSize)[startOff:endOff]) 720 } 721 return nil 722 } 723 724 func (f *MemoryFile) getChunkMapping(chunk int) ([]uintptr, uintptr, error) { 725 f.mappingsMu.Lock() 726 defer f.mappingsMu.Unlock() 727 // Another thread may have replaced f.mappings altogether due to file 728 // expansion. 729 mappings := f.mappings.Load().([]uintptr) 730 // Another thread may have already mapped the chunk. 731 if m := mappings[chunk]; m != 0 { 732 return mappings, m, nil 733 } 734 m, _, errno := unix.Syscall6( 735 unix.SYS_MMAP, 736 0, 737 chunkSize, 738 unix.PROT_READ|unix.PROT_WRITE, 739 unix.MAP_SHARED, 740 f.file.Fd(), 741 uintptr(chunk<<chunkShift)) 742 if errno != 0 { 743 return nil, 0, errno 744 } 745 atomic.StoreUintptr(&mappings[chunk], m) 746 return mappings, m, nil 747 } 748 749 // MarkEvictable allows f to request memory deallocation by calling 750 // user.Evict(er) in the future. 751 // 752 // Redundantly marking an already-evictable range as evictable has no effect. 753 func (f *MemoryFile) MarkEvictable(user EvictableMemoryUser, er EvictableRange) { 754 f.mu.Lock() 755 defer f.mu.Unlock() 756 info, ok := f.evictable[user] 757 if !ok { 758 info = &evictableMemoryUserInfo{} 759 f.evictable[user] = info 760 } 761 gap := info.ranges.LowerBoundGap(er.Start) 762 for gap.Ok() && gap.Start() < er.End { 763 gapER := gap.Range().Intersect(er) 764 if gapER.Length() == 0 { 765 gap = gap.NextGap() 766 continue 767 } 768 gap = info.ranges.Insert(gap, gapER, evictableRangeSetValue{}).NextGap() 769 } 770 if !info.evicting { 771 switch f.opts.DelayedEviction { 772 case DelayedEvictionDisabled: 773 // Kick off eviction immediately. 774 f.startEvictionGoroutineLocked(user, info) 775 case DelayedEvictionEnabled: 776 if !f.opts.UseHostMemcgPressure { 777 // Ensure that the reclaimer goroutine is running, so that it 778 // can start eviction when necessary. 779 f.reclaimCond.Signal() 780 } 781 } 782 } 783 } 784 785 // MarkUnevictable informs f that user no longer considers er to be evictable, 786 // so the MemoryFile should no longer call user.Evict(er). Note that, per 787 // EvictableMemoryUser.Evict's documentation, user.Evict(er) may still be 788 // called even after MarkUnevictable returns due to race conditions, and 789 // implementations of EvictableMemoryUser must handle this possibility. 790 // 791 // Redundantly marking an already-unevictable range as unevictable has no 792 // effect. 793 func (f *MemoryFile) MarkUnevictable(user EvictableMemoryUser, er EvictableRange) { 794 f.mu.Lock() 795 defer f.mu.Unlock() 796 info, ok := f.evictable[user] 797 if !ok { 798 return 799 } 800 seg := info.ranges.LowerBoundSegment(er.Start) 801 for seg.Ok() && seg.Start() < er.End { 802 seg = info.ranges.Isolate(seg, er) 803 seg = info.ranges.Remove(seg).NextSegment() 804 } 805 // We can only remove info if there's no eviction goroutine running on its 806 // behalf. 807 if !info.evicting && info.ranges.IsEmpty() { 808 delete(f.evictable, user) 809 } 810 } 811 812 // MarkAllUnevictable informs f that user no longer considers any offsets to be 813 // evictable. It otherwise has the same semantics as MarkUnevictable. 814 func (f *MemoryFile) MarkAllUnevictable(user EvictableMemoryUser) { 815 f.mu.Lock() 816 defer f.mu.Unlock() 817 info, ok := f.evictable[user] 818 if !ok { 819 return 820 } 821 info.ranges.RemoveAll() 822 // We can only remove info if there's no eviction goroutine running on its 823 // behalf. 824 if !info.evicting { 825 delete(f.evictable, user) 826 } 827 } 828 829 // ShouldCacheEvictable returns true if f is meaningfully delaying evictions of 830 // evictable memory, such that it may be advantageous to cache data in 831 // evictable memory. The value returned by ShouldCacheEvictable may change 832 // between calls. 833 func (f *MemoryFile) ShouldCacheEvictable() bool { 834 return f.opts.DelayedEviction == DelayedEvictionManual || f.opts.UseHostMemcgPressure 835 } 836 837 // UpdateUsage ensures that the memory usage statistics in 838 // usage.MemoryAccounting are up to date. 839 func (f *MemoryFile) UpdateUsage() error { 840 f.mu.Lock() 841 defer f.mu.Unlock() 842 843 // If the underlying usage matches where the usage tree already 844 // represents, then we can just avoid the entire scan (we know it's 845 // accurate). 846 currentUsage, err := f.TotalUsage() 847 if err != nil { 848 return err 849 } 850 if currentUsage == f.usageExpected && f.usageSwapped == 0 { 851 log.Debugf("UpdateUsage: skipped with usageSwapped=0.") 852 return nil 853 } 854 // If the current usage matches the expected but there's swap 855 // accounting, then ensure a scan takes place at least every second 856 // (when requested). 857 if currentUsage == f.usageExpected+f.usageSwapped && time.Now().Before(f.usageLast.Add(time.Second)) { 858 log.Debugf("UpdateUsage: skipped with usageSwapped!=0.") 859 return nil 860 } 861 // Linux updates usage values at CONFIG_HZ. 862 if scanningAfter := time.Now().Sub(f.usageLast).Milliseconds(); scanningAfter < time.Second.Milliseconds()/linux.CLOCKS_PER_SEC { 863 log.Debugf("UpdateUsage: skipped because previous scan happened %d ms back", scanningAfter) 864 return nil 865 } 866 867 f.usageLast = time.Now() 868 err = f.updateUsageLocked(currentUsage, mincore) 869 log.Debugf("UpdateUsage: currentUsage=%d, usageExpected=%d, usageSwapped=%d.", 870 currentUsage, f.usageExpected, f.usageSwapped) 871 log.Debugf("UpdateUsage: took %v.", time.Since(f.usageLast)) 872 return err 873 } 874 875 // updateUsageLocked attempts to detect commitment of previous-uncommitted 876 // pages by invoking checkCommitted, which is a function that, for each page i 877 // in bs, sets committed[i] to 1 if the page is committed and 0 otherwise. 878 // 879 // Precondition: f.mu must be held; it may be unlocked and reacquired. 880 // +checklocks:f.mu 881 func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(bs []byte, committed []byte) error) error { 882 // Track if anything changed to elide the merge. In the common case, we 883 // expect all segments to be committed and no merge to occur. 884 changedAny := false 885 defer func() { 886 if changedAny { 887 f.usage.MergeAll() 888 } 889 890 // Adjust the swap usage to reflect reality. 891 if f.usageExpected < currentUsage { 892 // Since no pages may be marked decommitted while we hold mu, we 893 // know that usage may have only increased since we got the last 894 // current usage. Therefore, if usageExpected is still short of 895 // currentUsage, we must assume that the difference is in pages 896 // that have been swapped. 897 newUsageSwapped := currentUsage - f.usageExpected 898 if f.usageSwapped < newUsageSwapped { 899 usage.MemoryAccounting.Inc(newUsageSwapped-f.usageSwapped, usage.System) 900 } else { 901 usage.MemoryAccounting.Dec(f.usageSwapped-newUsageSwapped, usage.System) 902 } 903 f.usageSwapped = newUsageSwapped 904 } else if f.usageSwapped != 0 { 905 // We have more usage accounted for than the file itself. 906 // That's fine, we probably caught a race where pages were 907 // being committed while the below loop was running. Just 908 // report the higher number that we found and ignore swap. 909 usage.MemoryAccounting.Dec(f.usageSwapped, usage.System) 910 f.usageSwapped = 0 911 } 912 }() 913 914 // Reused mincore buffer, will generally be <= 4096 bytes. 915 var buf []byte 916 917 // Iterate over all usage data. There will only be usage segments 918 // present when there is an associated reference. 919 for seg := f.usage.FirstSegment(); seg.Ok(); { 920 if !seg.ValuePtr().canCommit() { 921 seg = seg.NextSegment() 922 continue 923 } 924 925 // Get the range for this segment. As we touch slices, the 926 // Start value will be walked along. 927 r := seg.Range() 928 929 var checkErr error 930 err := f.forEachMappingSlice(r, 931 func(s []byte) { 932 if checkErr != nil { 933 return 934 } 935 936 // Ensure that we have sufficient buffer for the call 937 // (one byte per page). The length of each slice must 938 // be page-aligned. 939 bufLen := len(s) / hostarch.PageSize 940 if len(buf) < bufLen { 941 buf = make([]byte, bufLen) 942 } 943 944 // Query for new pages in core. 945 // NOTE(b/165896008): mincore (which is passed as checkCommitted) 946 // by f.UpdateUsage() might take a really long time. So unlock f.mu 947 // while checkCommitted runs. 948 f.mu.Unlock() // +checklocksforce 949 err := checkCommitted(s, buf) 950 f.mu.Lock() 951 if err != nil { 952 checkErr = err 953 return 954 } 955 956 // Scan each page and switch out segments. 957 seg := f.usage.LowerBoundSegment(r.Start) 958 for i := 0; i < bufLen; { 959 if buf[i]&0x1 == 0 { 960 i++ 961 continue 962 } 963 // Scan to the end of this committed range. 964 j := i + 1 965 for ; j < bufLen; j++ { 966 if buf[j]&0x1 == 0 { 967 break 968 } 969 } 970 committedFR := memmap.FileRange{ 971 Start: r.Start + uint64(i*hostarch.PageSize), 972 End: r.Start + uint64(j*hostarch.PageSize), 973 } 974 // Advance seg to committedFR.Start. 975 for seg.Ok() && seg.End() < committedFR.Start { 976 seg = seg.NextSegment() 977 } 978 // Mark pages overlapping committedFR as committed. 979 for seg.Ok() && seg.Start() < committedFR.End { 980 if seg.ValuePtr().canCommit() { 981 seg = f.usage.Isolate(seg, committedFR) 982 seg.ValuePtr().knownCommitted = true 983 amount := seg.Range().Length() 984 usage.MemoryAccounting.Inc(amount, seg.ValuePtr().kind) 985 f.usageExpected += amount 986 changedAny = true 987 } 988 seg = seg.NextSegment() 989 } 990 // Continue scanning for committed pages. 991 i = j + 1 992 } 993 994 // Advance r.Start. 995 r.Start += uint64(len(s)) 996 }) 997 if checkErr != nil { 998 return checkErr 999 } 1000 if err != nil { 1001 return err 1002 } 1003 1004 // Continue with the first segment after r.End. 1005 seg = f.usage.LowerBoundSegment(r.End) 1006 } 1007 1008 return nil 1009 } 1010 1011 // TotalUsage returns an aggregate usage for all memory statistics except 1012 // Mapped (which is external to MemoryFile). This is generally much cheaper 1013 // than UpdateUsage, but will not provide a fine-grained breakdown. 1014 func (f *MemoryFile) TotalUsage() (uint64, error) { 1015 // Stat the underlying file to discover the underlying usage. stat(2) 1016 // always reports the allocated block count in units of 512 bytes. This 1017 // includes pages in the page cache and swapped pages. 1018 var stat unix.Stat_t 1019 if err := unix.Fstat(int(f.file.Fd()), &stat); err != nil { 1020 return 0, err 1021 } 1022 return uint64(stat.Blocks * 512), nil 1023 } 1024 1025 // TotalSize returns the current size of the backing file in bytes, which is an 1026 // upper bound on the amount of memory that can currently be allocated from the 1027 // MemoryFile. The value returned by TotalSize is permitted to change. 1028 func (f *MemoryFile) TotalSize() uint64 { 1029 f.mu.Lock() 1030 defer f.mu.Unlock() 1031 return uint64(f.fileSize) 1032 } 1033 1034 // File returns the backing file. 1035 func (f *MemoryFile) File() *os.File { 1036 return f.file 1037 } 1038 1039 // FD implements memmap.File.FD. 1040 func (f *MemoryFile) FD() int { 1041 return int(f.file.Fd()) 1042 } 1043 1044 // String implements fmt.Stringer.String. 1045 // 1046 // Note that because f.String locks f.mu, calling f.String internally 1047 // (including indirectly through the fmt package) risks recursive locking. 1048 // Within the pgalloc package, use f.usage directly instead. 1049 func (f *MemoryFile) String() string { 1050 f.mu.Lock() 1051 defer f.mu.Unlock() 1052 return f.usage.String() 1053 } 1054 1055 // runReclaim implements the reclaimer goroutine, which continuously decommits 1056 // reclaimable pages in order to reduce memory usage and make them available 1057 // for allocation. 1058 func (f *MemoryFile) runReclaim() { 1059 for { 1060 // N.B. We must call f.markReclaimed on the returned FrameRange. 1061 fr, ok := f.findReclaimable() 1062 if !ok { 1063 break 1064 } 1065 1066 if f.opts.ManualZeroing { 1067 // If ManualZeroing is in effect, only hugepage-aligned regions may 1068 // be safely passed to decommitFile. Pages will be zeroed on 1069 // reallocation, so we don't need to perform any manual zeroing 1070 // here, whether or not decommitFile succeeds. 1071 if startAddr, ok := hostarch.Addr(fr.Start).HugeRoundUp(); ok { 1072 if endAddr := hostarch.Addr(fr.End).HugeRoundDown(); startAddr < endAddr { 1073 decommitFR := memmap.FileRange{uint64(startAddr), uint64(endAddr)} 1074 if err := f.decommitFile(decommitFR); err != nil { 1075 log.Warningf("Reclaim failed to decommit %v: %v", decommitFR, err) 1076 } 1077 } 1078 } 1079 } else { 1080 if err := f.decommitFile(fr); err != nil { 1081 log.Warningf("Reclaim failed to decommit %v: %v", fr, err) 1082 // Zero the pages manually. This won't reduce memory usage, but at 1083 // least ensures that the pages will be zero when reallocated. 1084 if err := f.manuallyZero(fr); err != nil { 1085 panic(fmt.Sprintf("Reclaim failed to decommit or zero %v: %v", fr, err)) 1086 } 1087 } 1088 } 1089 f.markDecommitted(fr) 1090 f.markReclaimed(fr) 1091 } 1092 1093 // We only get here if findReclaimable finds f.destroyed set and returns 1094 // false. 1095 f.mu.Lock() 1096 if !f.destroyed { 1097 f.mu.Unlock() 1098 panic("findReclaimable broke out of reclaim loop, but destroyed is no longer set") 1099 } 1100 f.file.Close() 1101 // Ensure that any attempts to use f.file.Fd() fail instead of getting a fd 1102 // that has possibly been reassigned. 1103 f.file = nil 1104 f.mappingsMu.Lock() 1105 defer f.mappingsMu.Unlock() 1106 mappings := f.mappings.Load().([]uintptr) 1107 for i, m := range mappings { 1108 if m != 0 { 1109 _, _, errno := unix.Syscall(unix.SYS_MUNMAP, m, chunkSize, 0) 1110 if errno != 0 { 1111 log.Warningf("Failed to unmap mapping %#x for MemoryFile chunk %d: %v", m, i, errno) 1112 } 1113 } 1114 } 1115 // Similarly, invalidate f.mappings. (atomic.Value.Store(nil) panics.) 1116 f.mappings.Store([]uintptr{}) 1117 f.mu.Unlock() 1118 1119 // This must be called without holding f.mu to avoid circular lock 1120 // ordering. 1121 if f.stopNotifyPressure != nil { 1122 f.stopNotifyPressure() 1123 } 1124 } 1125 1126 // findReclaimable finds memory that has been marked for reclaim. 1127 // 1128 // Note that there returned range will be removed from tracking. It 1129 // must be reclaimed (removed from f.usage) at this point. 1130 func (f *MemoryFile) findReclaimable() (memmap.FileRange, bool) { 1131 f.mu.Lock() 1132 defer f.mu.Unlock() 1133 for { 1134 for { 1135 if f.destroyed { 1136 return memmap.FileRange{}, false 1137 } 1138 if f.reclaimable { 1139 break 1140 } 1141 if f.opts.DelayedEviction == DelayedEvictionEnabled && !f.opts.UseHostMemcgPressure { 1142 // No work to do. Evict any pending evictable allocations to 1143 // get more reclaimable pages before going to sleep. 1144 f.startEvictionsLocked() 1145 } 1146 f.reclaimCond.Wait() 1147 } 1148 // Allocate works from the back of the file inwards, so reclaim 1149 // preserves this order to minimize the cost of the search. 1150 if seg := f.reclaim.LastSegment(); seg.Ok() { 1151 fr := seg.Range() 1152 f.reclaim.Remove(seg) 1153 return fr, true 1154 } 1155 // Nothing is reclaimable. 1156 f.reclaimable = false 1157 } 1158 } 1159 1160 func (f *MemoryFile) markReclaimed(fr memmap.FileRange) { 1161 f.mu.Lock() 1162 defer f.mu.Unlock() 1163 seg := f.usage.FindSegment(fr.Start) 1164 // All of fr should be mapped to a single uncommitted reclaimable 1165 // segment accounted to System. 1166 if !seg.Ok() { 1167 panic(fmt.Sprintf("reclaimed pages %v include unreferenced pages:\n%v", fr, &f.usage)) 1168 } 1169 if !seg.Range().IsSupersetOf(fr) { 1170 panic(fmt.Sprintf("reclaimed pages %v are not entirely contained in segment %v with state %v:\n%v", fr, seg.Range(), seg.Value(), &f.usage)) 1171 } 1172 if got, want := seg.Value(), (usageInfo{ 1173 kind: usage.System, 1174 knownCommitted: false, 1175 refs: 0, 1176 }); got != want { 1177 panic(fmt.Sprintf("reclaimed pages %v in segment %v has incorrect state %v, wanted %v:\n%v", fr, seg.Range(), got, want, &f.usage)) 1178 } 1179 // Deallocate reclaimed pages. Even though all of seg is reclaimable, 1180 // the caller of markReclaimed may not have decommitted it, so we can 1181 // only mark fr as reclaimed. 1182 f.usage.Remove(f.usage.Isolate(seg, fr)) 1183 } 1184 1185 // StartEvictions requests that f evict all evictable allocations. It does not 1186 // wait for eviction to complete; for this, see MemoryFile.WaitForEvictions. 1187 func (f *MemoryFile) StartEvictions() { 1188 f.mu.Lock() 1189 defer f.mu.Unlock() 1190 f.startEvictionsLocked() 1191 } 1192 1193 // Preconditions: f.mu must be locked. 1194 func (f *MemoryFile) startEvictionsLocked() bool { 1195 startedAny := false 1196 for user, info := range f.evictable { 1197 // Don't start multiple goroutines to evict the same user's 1198 // allocations. 1199 if !info.evicting { 1200 f.startEvictionGoroutineLocked(user, info) 1201 startedAny = true 1202 } 1203 } 1204 return startedAny 1205 } 1206 1207 // Preconditions: 1208 // * info == f.evictable[user]. 1209 // * !info.evicting. 1210 // * f.mu must be locked. 1211 func (f *MemoryFile) startEvictionGoroutineLocked(user EvictableMemoryUser, info *evictableMemoryUserInfo) { 1212 info.evicting = true 1213 f.evictionWG.Add(1) 1214 go func() { // S/R-SAFE: f.evictionWG 1215 defer f.evictionWG.Done() 1216 for { 1217 f.mu.Lock() 1218 info, ok := f.evictable[user] 1219 if !ok { 1220 // This shouldn't happen: only this goroutine is permitted 1221 // to delete this entry. 1222 f.mu.Unlock() 1223 panic(fmt.Sprintf("evictableMemoryUserInfo for EvictableMemoryUser %v deleted while eviction goroutine running", user)) 1224 } 1225 if info.ranges.IsEmpty() { 1226 delete(f.evictable, user) 1227 f.mu.Unlock() 1228 return 1229 } 1230 // Evict from the end of info.ranges, under the assumption that 1231 // if ranges in user start being used again (and are 1232 // consequently marked unevictable), such uses are more likely 1233 // to start from the beginning of user. 1234 seg := info.ranges.LastSegment() 1235 er := seg.Range() 1236 info.ranges.Remove(seg) 1237 // user.Evict() must be called without holding f.mu to avoid 1238 // circular lock ordering. 1239 f.mu.Unlock() 1240 user.Evict(context.Background(), er) 1241 } 1242 }() 1243 } 1244 1245 // WaitForEvictions blocks until f is no longer evicting any evictable 1246 // allocations. 1247 func (f *MemoryFile) WaitForEvictions() { 1248 f.evictionWG.Wait() 1249 } 1250 1251 type usageSetFunctions struct{} 1252 1253 func (usageSetFunctions) MinKey() uint64 { 1254 return 0 1255 } 1256 1257 func (usageSetFunctions) MaxKey() uint64 { 1258 return math.MaxUint64 1259 } 1260 1261 func (usageSetFunctions) ClearValue(val *usageInfo) { 1262 } 1263 1264 func (usageSetFunctions) Merge(_ memmap.FileRange, val1 usageInfo, _ memmap.FileRange, val2 usageInfo) (usageInfo, bool) { 1265 return val1, val1 == val2 1266 } 1267 1268 func (usageSetFunctions) Split(_ memmap.FileRange, val usageInfo, _ uint64) (usageInfo, usageInfo) { 1269 return val, val 1270 } 1271 1272 // evictableRangeSetValue is the value type of evictableRangeSet. 1273 type evictableRangeSetValue struct{} 1274 1275 type evictableRangeSetFunctions struct{} 1276 1277 func (evictableRangeSetFunctions) MinKey() uint64 { 1278 return 0 1279 } 1280 1281 func (evictableRangeSetFunctions) MaxKey() uint64 { 1282 return math.MaxUint64 1283 } 1284 1285 func (evictableRangeSetFunctions) ClearValue(val *evictableRangeSetValue) { 1286 } 1287 1288 func (evictableRangeSetFunctions) Merge(_ EvictableRange, _ evictableRangeSetValue, _ EvictableRange, _ evictableRangeSetValue) (evictableRangeSetValue, bool) { 1289 return evictableRangeSetValue{}, true 1290 } 1291 1292 func (evictableRangeSetFunctions) Split(_ EvictableRange, _ evictableRangeSetValue, _ uint64) (evictableRangeSetValue, evictableRangeSetValue) { 1293 return evictableRangeSetValue{}, evictableRangeSetValue{} 1294 } 1295 1296 // reclaimSetValue is the value type of reclaimSet. 1297 type reclaimSetValue struct{} 1298 1299 type reclaimSetFunctions struct{} 1300 1301 func (reclaimSetFunctions) MinKey() uint64 { 1302 return 0 1303 } 1304 1305 func (reclaimSetFunctions) MaxKey() uint64 { 1306 return math.MaxUint64 1307 } 1308 1309 func (reclaimSetFunctions) ClearValue(val *reclaimSetValue) { 1310 } 1311 1312 func (reclaimSetFunctions) Merge(_ memmap.FileRange, _ reclaimSetValue, _ memmap.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) { 1313 return reclaimSetValue{}, true 1314 } 1315 1316 func (reclaimSetFunctions) Split(_ memmap.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) { 1317 return reclaimSetValue{}, reclaimSetValue{} 1318 }