github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/memsys/mmsa.go (about) 1 // Package memsys provides memory management and slab/SGL allocation with io.Reader and io.Writer interfaces 2 // on top of scatter-gather lists of reusable buffers. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package memsys 7 8 import ( 9 "fmt" 10 "os" 11 "strconv" 12 "time" 13 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/atomic" 16 "github.com/NVIDIA/aistore/cmn/cos" 17 "github.com/NVIDIA/aistore/cmn/debug" 18 "github.com/NVIDIA/aistore/sys" 19 ) 20 21 // ====================== How to run unit tests =========================== 22 // 23 // 1. Run all tests with default parameters 24 // $ go test -v 25 // 2. ... and debug enabled 26 // $ go test -v -tags=debug 27 // 3. ... and deadbeef (build tag) enabled, to "DEADBEEF" every freed buffer 28 // $ go test -v -tags=debug,deadbeef 29 // 4. Run a given named test with the specified build tags for 100s 30 // $ go test -v -tags=debug,deadbeef -run=Test_Sleep -duration=100s 31 32 // ============== Memory Manager Slab Allocator (MMSA) =========================== 33 // 34 // MMSA is, simultaneously, a Slab and SGL allocator, and a memory manager 35 // responsible to optimize memory usage between different (more vs less) utilized 36 // Slabs. 37 // 38 // Multiple MMSA instances may coexist in the system, each having its own 39 // constraints and managing its own Slabs and SGLs. 40 // 41 // There will be use cases, however, when actually running a MMSA instance 42 // won't be necessary: e.g., when an app utilizes a single (or a few distinct) 43 // Slab size(s) for the duration of its relatively short lifecycle, 44 // while at the same time preferring minimal interference with other running apps. 45 // 46 // In that sense, a typical initialization sequence includes 2 steps, e.g.: 47 // 1) construct: 48 // mm := &memsys.MMSA{TimeIval: ..., MinPctFree: ..., Name: ...} 49 // 2) initialize: 50 // err := mm.Init() 51 // if err != nil { 52 // ... 53 // } 54 // 55 // To free up all memory allocated by a given MMSA instance, use its Terminate() method. 56 // 57 // In addition, there are several environment variables that can be used 58 // (to circumvent the need to change the code, for instance): 59 // "AIS_MINMEM_FREE" 60 // "AIS_MINMEM_PCT_TOTAL" 61 // "AIS_MINMEM_PCT_FREE" 62 // These names must be self-explanatory. 63 // 64 // Once constructed and initialized, memory-manager-and-slab-allocator 65 // (MMSA) can be exercised via its public API that includes 66 // GetSlab() and Alloc*() methods. 67 // 68 // Once selected, each Slab instance can be used via its own public API that 69 // includes Alloc() and Free() methods. In addition, each allocated SGL internally 70 // utilizes one of the existing enumerated slabs to "grow" (that is, allocate more 71 // buffers from the slab) on demand. For details, look for "grow" in the iosgl.go. 72 73 const readme = cmn.GitHubHome + "/blob/main/memsys/README.md" 74 75 // =================================== tunables ========================================== 76 // The minimum memory (that must remain available) gets computed as follows: 77 // 1) environment AIS_MINMEM_FREE takes precedence over everything else; 78 // 2) if AIS_MINMEM_FREE is not defined, environment variables AIS_MINMEM_PCT_TOTAL and/or 79 // AIS_MINMEM_PCT_FREE define percentages to compute the minimum based on total 80 // or the currently available memory, respectively; 81 // 3) with no environment, the minimum is computed based on the following MMSA member variables: 82 // * MinFree uint64 // memory that must be available at all times 83 // * MinPctTotal int // same, via percentage of total 84 // * MinPctFree int // ditto, as % of free at init time 85 // (example: 86 // mm := &memsys.MMSA{MinPctTotal: 4, MinFree: cos.GiB * 2} 87 // ) 88 // 4) finally, if none of the above is specified, the constant `minMemFree` below is used 89 // Other important defaults are also commented below. 90 // =================================== MMSA config defaults ========================================== 91 92 const ( 93 PageSize = cos.KiB * 4 94 DefaultBufSize = PageSize * 8 95 DefaultBuf2Size = PageSize * 16 96 DefaultSmallBufSize = cos.KiB 97 ) 98 99 // page slabs: pagesize increments up to MaxPageSlabSize 100 const ( 101 MaxPageSlabSize = 128 * cos.KiB 102 PageSlabIncStep = PageSize 103 NumPageSlabs = MaxPageSlabSize / PageSlabIncStep // = 32 104 ) 105 106 // small slabs: 128 byte increments up to MaxSmallSlabSize 107 const ( 108 MaxSmallSlabSize = PageSize 109 SmallSlabIncStep = 128 110 NumSmallSlabs = MaxSmallSlabSize / SmallSlabIncStep // = 32 111 ) 112 113 const NumStats = NumPageSlabs // NOTE: must be >= NumSmallSlabs 114 115 const ( 116 optDepth = 128 // ring "depth", i.e., num free bufs we trend to (see grow()) 117 minDepth = 4 // depth when idle or under OOM 118 maxDepth = 4096 // exceeding warrants reallocation 119 120 loadAvg = 10 // "idle" load average to deallocate Slabs when below 121 ) 122 123 const countThreshold = 16 // exceeding this scatter-gather count warrants selecting a larger-(buffer)-size Slab 124 125 const swappingMax = 4 // make sure that `swapping` condition, once noted, lingers for a while 126 127 type ( 128 Stats struct { 129 Hits [NumStats]uint64 130 Idle [NumStats]time.Duration 131 } 132 MMSA struct { 133 // public 134 MinFree uint64 // memory that must be available at all times 135 TimeIval time.Duration // interval of time to watch for low memory and make steps 136 MinPctTotal int // same, via percentage of total 137 MinPctFree int // ditto, as % of free at init time 138 Name string 139 // private 140 info string 141 sibling *MMSA 142 lowWM uint64 143 rings []*Slab 144 sorted []*Slab 145 slabStats *slabStats // private counters and idle timestamp 146 statsSnapshot *Stats // pre-allocated limited "snapshot" of slabStats 147 slabIncStep int64 148 maxSlabSize int64 149 defBufSize int64 150 mem sys.MemStat 151 numSlabs int 152 // atomic state 153 toGC atomic.Int64 // accumulates over time and triggers GC upon reaching spec-ed limit 154 optDepth atomic.Int64 // ring "depth", i.e., num free bufs we trend to (see grow()) 155 swap struct { 156 size atomic.Uint64 // actual swap size 157 crit atomic.Int32 // tracks increasing swap size up to swappingMax const 158 } 159 } 160 FreeSpec struct { 161 IdleDuration time.Duration // reduce only the slabs that are idling for at least as much time 162 MinSize int64 // minimum freed size that'd warrant calling GC (default = sizetoGC) 163 Totally bool // true: free all slabs regardless of their idle-ness and size 164 ToOS bool // GC and then return the memory to the operating system 165 } 166 // 167 // private 168 // 169 slabStats struct { 170 hits [NumStats]atomic.Uint64 171 prev [NumStats]uint64 172 hinc [NumStats]uint64 173 idleTs [NumStats]atomic.Int64 174 } 175 ) 176 177 ////////// 178 // MMSA // 179 ////////// 180 181 func (r *MMSA) String() string { 182 var ( 183 mem sys.MemStat 184 err error 185 ) 186 err = mem.Get() 187 debug.AssertNoErr(err) 188 return r.Str(&mem) 189 } 190 191 func (r *MMSA) Str(mem *sys.MemStat) string { 192 sp := r.pressure2S(r.Pressure(mem)) 193 if r.info == "" { 194 r.info = "(min-free " + cos.ToSizeIEC(int64(r.MinFree), 0) + ", low-wm " + cos.ToSizeIEC(int64(r.lowWM), 0) 195 } 196 return r.Name + "[(" + mem.String() + "), " + sp + ", " + r.info + "]" 197 } 198 199 // allocate SGL 200 // - immediateSize: known size, OR minimum expected size, OR size to preallocate 201 // immediateSize == 0 translates as DefaultBufSize - for page MMSA, 202 // and DefaultSmallBufSize - for small-size MMSA 203 // - sbufSize: slab buffer size (optional) 204 func (r *MMSA) NewSGL(immediateSize int64, sbufSize ...int64) *SGL { 205 var ( 206 slab *Slab 207 n int64 208 err error 209 ) 210 // 1. slab 211 if len(sbufSize) > 0 { 212 slab, err = r.GetSlab(sbufSize[0]) 213 } else if immediateSize <= r.maxSlabSize { 214 // NOTE allocate imm. size in one shot when below max 215 if immediateSize == 0 { 216 immediateSize = r.defBufSize 217 } 218 i := cos.DivCeil(immediateSize, r.slabIncStep) 219 slab = r.rings[i-1] 220 } else { 221 slab = r._large2slab(immediateSize) 222 } 223 debug.AssertNoErr(err) 224 225 // 2. sgl 226 z := _allocSGL(r.isPage()) 227 z.slab = slab 228 n = cos.DivCeil(immediateSize, slab.Size()) 229 if cap(z.sgl) < int(n) { 230 z.sgl = make([][]byte, n) 231 } else { 232 z.sgl = z.sgl[:n] 233 } 234 slab.muget.Lock() 235 for i := range int(n) { 236 z.sgl[i] = slab._alloc() 237 } 238 slab.muget.Unlock() 239 return z 240 } 241 242 // gets Slab for a given fixed buffer size that must be within expected range of sizes 243 // - the range supported by _this_ MMSA (compare w/ SelectMemAndSlab()) 244 func (r *MMSA) GetSlab(bufSize int64) (s *Slab, err error) { 245 a, b := bufSize/r.slabIncStep, bufSize%r.slabIncStep 246 if b != 0 { 247 err = fmt.Errorf("memsys: size %d must be a multiple of %d", bufSize, r.slabIncStep) 248 return 249 } 250 if a < 1 || a > int64(r.numSlabs) { 251 err = fmt.Errorf("memsys: size %d outside valid range", bufSize) 252 return 253 } 254 s = r.rings[a-1] 255 return 256 } 257 258 // uses SelectMemAndSlab to select both MMSA (page or small) and its Slab 259 func (r *MMSA) AllocSize(size int64) (buf []byte, slab *Slab) { 260 _, slab = r.SelectMemAndSlab(size) 261 buf = slab.Alloc() 262 return 263 } 264 265 func (r *MMSA) Alloc() (buf []byte, slab *Slab) { 266 size := r.defBufSize 267 _, slab = r.SelectMemAndSlab(size) 268 buf = slab.Alloc() 269 return 270 } 271 272 func (r *MMSA) Free(buf []byte) { 273 size := int64(cap(buf)) 274 if size > r.maxSlabSize && !r.isPage() { 275 r.sibling.Free(buf) 276 } else if size < r.slabIncStep && r.isPage() { 277 r.sibling.Free(buf) 278 } else { 279 debug.Assert(size%r.slabIncStep == 0) 280 debug.Assert(size/r.slabIncStep <= int64(r.numSlabs)) 281 282 slab := r._selectSlab(size) 283 slab.Free(buf) 284 } 285 } 286 287 // Given a known, expected or minimum size to allocate, selects MMSA (page or small, if initialized) 288 // and its Slab 289 func (r *MMSA) SelectMemAndSlab(size int64) (mmsa *MMSA, slab *Slab) { 290 if size > r.maxSlabSize && !r.isPage() { 291 return r.sibling, r.sibling._selectSlab(size) 292 } 293 if size < r.slabIncStep && r.isPage() { 294 return r.sibling, r.sibling._selectSlab(size) 295 } 296 mmsa, slab = r, r._selectSlab(size) 297 return 298 } 299 300 func (r *MMSA) _selectSlab(size int64) (slab *Slab) { 301 if size >= r.maxSlabSize { 302 slab = r.rings[len(r.rings)-1] 303 } else if size <= r.slabIncStep { 304 slab = r.rings[0] 305 } else { 306 i := (size + r.slabIncStep - 1) / r.slabIncStep 307 slab = r.rings[i-1] 308 } 309 return 310 } 311 312 func (r *MMSA) Append(buf []byte, bytes string) (nbuf []byte) { 313 var ( 314 ll, l, c = len(buf), len(bytes), cap(buf) 315 a = ll + l - c 316 ) 317 if a > 0 { 318 nbuf, _ = r.AllocSize(int64(c + a)) 319 copy(nbuf, buf) 320 r.Free(buf) 321 nbuf = nbuf[:ll+l] 322 } else { 323 nbuf = buf[:ll+l] 324 } 325 copy(nbuf[ll:], bytes) 326 return 327 } 328 329 // private 330 331 // select slab for SGL given a large immediate size to allocate 332 func (r *MMSA) _large2slab(immediateSize int64) *Slab { 333 size := cos.DivCeil(immediateSize, countThreshold) 334 for _, slab := range r.rings { 335 if slab.Size() >= size { 336 return slab 337 } 338 } 339 return r.rings[len(r.rings)-1] 340 } 341 342 func (r *MMSA) env() (err error) { 343 var minfree int64 344 if a := os.Getenv("AIS_MINMEM_FREE"); a != "" { 345 if minfree, err = cos.ParseSize(a, cos.UnitsIEC); err != nil { 346 return fmt.Errorf("memsys: cannot parse AIS_MINMEM_FREE %q", a) 347 } 348 r.MinFree = uint64(minfree) 349 } 350 if a := os.Getenv("AIS_MINMEM_PCT_TOTAL"); a != "" { 351 if r.MinPctTotal, err = strconv.Atoi(a); err != nil { 352 return fmt.Errorf("memsys: cannot parse AIS_MINMEM_PCT_TOTAL %q", a) 353 } 354 if r.MinPctTotal < 0 || r.MinPctTotal > 100 { 355 return fmt.Errorf("memsys: invalid AIS_MINMEM_PCT_TOTAL %q", a) 356 } 357 } 358 if a := os.Getenv("AIS_MINMEM_PCT_FREE"); a != "" { 359 if r.MinPctFree, err = strconv.Atoi(a); err != nil { 360 return fmt.Errorf("memsys: cannot parse AIS_MINMEM_PCT_FREE %q", a) 361 } 362 if r.MinPctFree < 0 || r.MinPctFree > 100 { 363 return fmt.Errorf("memsys: invalid AIS_MINMEM_PCT_FREE %q", a) 364 } 365 } 366 return 367 }