github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/memsys/housekeep_mm.go (about)

     1  // Package memsys provides memory management and slab/SGL allocation with io.Reader and io.Writer interfaces
     2  // on top of scatter-gather lists of reusable buffers.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package memsys
     7  
     8  import (
     9  	"sort"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/cmn"
    13  	"github.com/NVIDIA/aistore/cmn/cos"
    14  	"github.com/NVIDIA/aistore/cmn/debug"
    15  	"github.com/NVIDIA/aistore/cmn/mono"
    16  	"github.com/NVIDIA/aistore/cmn/nlog"
    17  	"github.com/NVIDIA/aistore/sys"
    18  )
    19  
    20  const (
    21  	freeIdleMinDur = 90 * time.Second   // time to reduce an idle slab to a minimum depth (see mindepth)
    22  	freeIdleZero   = freeIdleMinDur * 2 // ... to zero
    23  )
    24  
    25  // hk tunables (via config.Memsys section)
    26  var (
    27  	sizeToGC      = int64(cos.GiB + cos.GiB>>1) // run GC when sum(`freed`) > sizeToGC
    28  	memCheckAbove = 90 * time.Second            // memory checking frequency when above low watermark
    29  )
    30  
    31  // API: on-demand memory freeing to the user-provided specification
    32  func (r *MMSA) FreeSpec(spec FreeSpec) {
    33  	var freed int64
    34  	if spec.Totally {
    35  		for _, s := range r.rings {
    36  			freed += s.cleanup()
    37  		}
    38  	} else {
    39  		if spec.IdleDuration == 0 {
    40  			spec.IdleDuration = freeIdleMinDur // using the default
    41  		}
    42  		stats := r.GetStats()
    43  		for _, s := range r.rings {
    44  			if idle := s.idleDur(stats); idle > spec.IdleDuration {
    45  				x := s.cleanup()
    46  				if x > 0 {
    47  					freed += x
    48  					if cmn.Rom.FastV(5, cos.SmoduleMemsys) {
    49  						nlog.Infof("%s: idle for %v - cleanup", s.tag, idle)
    50  					}
    51  				}
    52  			}
    53  		}
    54  	}
    55  	if freed > 0 {
    56  		r.toGC.Add(freed)
    57  		if spec.MinSize == 0 {
    58  			spec.MinSize = sizeToGC // using default
    59  		}
    60  		r.freeMemToOS(spec.MinSize, spec.ToOS /* force */)
    61  	}
    62  }
    63  
    64  // copies part of the internal stats into user-visible Stats
    65  func (r *MMSA) GetStats() (stats *Stats) {
    66  	stats = &Stats{}
    67  	r._snap(stats, mono.NanoTime())
    68  	return
    69  }
    70  
    71  //
    72  // private
    73  //
    74  
    75  func (r *MMSA) hkcb() time.Duration {
    76  	// 1. refresh and clone stats
    77  	r.refreshStats()
    78  
    79  	// 2. update swapping state and compute mem-pressure ranking
    80  	err := r.mem.Get()
    81  	debug.AssertNoErr(err)
    82  	r.updSwap(&r.mem)
    83  	pressure := r.Pressure(&r.mem)
    84  
    85  	// 3. memory is enough, free only those that are idle for a while
    86  	if pressure == PressureLow {
    87  		r.optDepth.Store(optDepth)
    88  		if freed := r.freeIdle(); freed > 0 {
    89  			r.toGC.Add(freed)
    90  			r.freeMemToOS(sizeToGC, false)
    91  		}
    92  		return r.hkIval(pressure)
    93  	}
    94  
    95  	// 4. calibrate and mem-free accordingly
    96  	var (
    97  		mingc = sizeToGC // minimum accumulated size that triggers GC
    98  		depth int        // => current ring depth tbd
    99  	)
   100  	switch pressure {
   101  	case OOM, PressureExtreme:
   102  		r.optDepth.Store(minDepth)
   103  		depth = minDepth
   104  		mingc = sizeToGC / 4
   105  	case PressureHigh:
   106  		tmp := max(r.optDepth.Load()/2, optDepth/4)
   107  		r.optDepth.Store(tmp)
   108  		depth = int(tmp)
   109  		mingc = sizeToGC / 2
   110  	default: // PressureModerate
   111  		r.optDepth.Store(optDepth)
   112  		depth = optDepth / 2
   113  	}
   114  
   115  	// 5.
   116  	// - sort (idle < less-idle < busy) taking into account durations and ring hits (in that order)
   117  	// - _reduce_
   118  	sort.Slice(r.sorted, r.idleLess)
   119  	for _, s := range r.sorted { // idle first
   120  		if idle := r.statsSnapshot.Idle[s.ringIdx()]; idle > freeIdleMinDur/2 {
   121  			depth = minDepth
   122  		}
   123  		freed := s.reduce(depth)
   124  		r.toGC.Add(freed)
   125  	}
   126  
   127  	// 6. GC and free mem to OS
   128  	r.freeMemToOS(mingc, pressure >= PressureHigh /*force*/)
   129  	return r.hkIval(pressure)
   130  }
   131  
   132  func (r *MMSA) hkIval(pressure int) time.Duration {
   133  	switch pressure {
   134  	case PressureLow:
   135  		return r.TimeIval * 2
   136  	case PressureModerate:
   137  		return r.TimeIval
   138  	default:
   139  		return r.TimeIval / 2
   140  	}
   141  }
   142  
   143  // refresh and clone internal hits/idle stats
   144  func (r *MMSA) refreshStats() {
   145  	now := mono.NanoTime()
   146  	for i := range r.numSlabs {
   147  		hits, prev := r.slabStats.hits[i].Load(), r.slabStats.prev[i]
   148  		hinc := hits - prev
   149  		if hinc == 0 {
   150  			r.slabStats.idleTs[i].CAS(0, now)
   151  		} else {
   152  			r.slabStats.idleTs[i].Store(0)
   153  		}
   154  		r.slabStats.hinc[i], r.slabStats.prev[i] = hinc, hits
   155  	}
   156  
   157  	r._snap(r.statsSnapshot, now)
   158  }
   159  
   160  func (r *MMSA) idleLess(i, j int) bool {
   161  	var (
   162  		ii = r.sorted[i].ringIdx()
   163  		jj = r.sorted[j].ringIdx()
   164  	)
   165  	if r.statsSnapshot.Idle[ii] > 0 {
   166  		if r.statsSnapshot.Idle[jj] > 0 {
   167  			return r.statsSnapshot.Idle[ii] > r.statsSnapshot.Idle[jj]
   168  		}
   169  		return true
   170  	}
   171  	if r.slabStats.idleTs[jj].Load() != 0 {
   172  		return false
   173  	}
   174  	return r.slabStats.hinc[ii] < r.slabStats.hinc[jj]
   175  }
   176  
   177  // freeIdle traverses and deallocates idle slabs- those that were not used for at
   178  // least the specified duration; returns freed size
   179  func (r *MMSA) freeIdle() (total int64) {
   180  	for _, s := range r.rings {
   181  		var (
   182  			freed int64
   183  			idle  = r.statsSnapshot.Idle[s.ringIdx()]
   184  		)
   185  		switch {
   186  		case idle > freeIdleZero:
   187  			freed = s.cleanup()
   188  		case idle > freeIdleMinDur:
   189  			freed = s.reduce(optDepth / 4)
   190  		case idle > freeIdleMinDur/2:
   191  			freed = s.reduce(optDepth / 2)
   192  		default:
   193  			continue
   194  		}
   195  		total += freed
   196  		if freed > 0 && cmn.Rom.FastV(5, cos.SmoduleMemsys) {
   197  			nlog.Infof("%s idle for %v: freed %s", s.tag, idle, cos.ToSizeIEC(freed, 1))
   198  		}
   199  	}
   200  	return
   201  }
   202  
   203  // check "minimum" and "load" conditions and calls (expensive, serialized) goroutine
   204  func (r *MMSA) freeMemToOS(mingc int64, force bool) {
   205  	avg, err := sys.LoadAverage()
   206  	if err != nil {
   207  		nlog.Errorf("Failed to load averages: %v", err) // (unlikely)
   208  		avg.One = 999
   209  	}
   210  	togc := r.toGC.Load()
   211  
   212  	// too little to bother?
   213  	if togc < mingc {
   214  		return
   215  	}
   216  	// too loaded w/ no urgency?
   217  	if avg.One > loadAvg /*idle*/ && !force {
   218  		return
   219  	}
   220  
   221  	if started := cos.FreeMemToOS(force); started {
   222  		nlog.Infof("%s: free mem to OS: %s, load %.2f, force %t", r, cos.ToSizeIEC(togc, 1), avg.One, force)
   223  		r.toGC.Store(0)
   224  	}
   225  }
   226  
   227  func (r *MMSA) _snap(stats *Stats, now int64) {
   228  	for i := range r.rings {
   229  		stats.Hits[i] = r.slabStats.hits[i].Load()
   230  		stats.Idle[i] = 0
   231  		if since := r.slabStats.idleTs[i].Load(); since != 0 {
   232  			stats.Idle[i] = time.Duration(now - since)
   233  		}
   234  	}
   235  }