github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/mem_watcher.go (about) 1 // Package dsort provides APIs for distributed archive file shuffling. 2 /* 3 * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package dsort 6 7 import ( 8 "sync" 9 "time" 10 11 "github.com/NVIDIA/aistore/cmn/atomic" 12 "github.com/NVIDIA/aistore/cmn/cos" 13 "github.com/NVIDIA/aistore/ext/dsort/shard" 14 "github.com/NVIDIA/aistore/sys" 15 ) 16 17 const ( 18 memoryReservedInterval = 50 * time.Millisecond 19 memoryExcessInterval = time.Second 20 unreserveMemoryBufferSize = 10000 21 ) 22 23 type singleMemoryWatcher struct { 24 wg *sync.WaitGroup 25 ticker *time.Ticker 26 stopCh cos.StopCh 27 } 28 29 // memoryWatcher is responsible for monitoring memory changes and decide 30 // whether specific action should happen or not. It may also decide to return 31 type memoryWatcher struct { 32 m *Manager 33 34 excess, reserved *singleMemoryWatcher 35 maxMemoryToUse uint64 36 reservedMemory atomic.Uint64 37 memoryUsed atomic.Uint64 // memory used in specific point in time, it is refreshed once in a while 38 unreserveMemoryCh chan uint64 39 } 40 41 func newSingleMemoryWatcher(interval time.Duration) *singleMemoryWatcher { 42 smw := &singleMemoryWatcher{wg: &sync.WaitGroup{}, ticker: time.NewTicker(interval)} 43 smw.stopCh.Init() 44 return smw 45 } 46 47 func newMemoryWatcher(m *Manager, maxMemoryUsage uint64) *memoryWatcher { 48 return &memoryWatcher{ 49 m: m, 50 51 excess: newSingleMemoryWatcher(memoryExcessInterval), 52 reserved: newSingleMemoryWatcher(memoryReservedInterval), 53 maxMemoryToUse: maxMemoryUsage, 54 unreserveMemoryCh: make(chan uint64, unreserveMemoryBufferSize), 55 } 56 } 57 58 func (mw *memoryWatcher) watch() error { 59 var mem sys.MemStat 60 if err := mem.Get(); err != nil { 61 return err 62 } 63 mw.memoryUsed.Store(mem.ActualUsed) 64 65 mw.reserved.wg.Add(1) 66 go mw.watchReserved() 67 mw.excess.wg.Add(1) 68 go mw.watchExcess(mem) 69 return nil 70 } 71 72 func (mw *memoryWatcher) watchReserved() { 73 defer mw.reserved.wg.Done() 74 75 // Starting memory updater. Since extraction phase is concurrent and we 76 // cannot know how much memory will given compressed shard extract we need 77 // to employ mechanism for updating memory. Just before extraction we 78 // estimate how much memory shard will contain (by multiplying file size and 79 // avg compress ratio). Then we update currently used memory to actual used 80 // + reserved. After we finish extraction we put reserved memory for the 81 // shard into the unreserve memory channel. Note that we cannot unreserve it 82 // right away because actual used memory has not yet been updated (but it 83 // surely changed). Once memory updater will fetch and update currently used 84 // memory in system we can unreserve memory (it is already calculated in 85 // newly fetched memory usage value). This way it is almost impossible to 86 // exceed maximum memory which we are able to use (set by user) - 87 // unfortunately it can happen when we underestimate the amount of memory 88 // which we will use when extracting compressed file. 89 for { 90 select { 91 case <-mw.reserved.ticker.C: 92 var curMem sys.MemStat 93 if err := curMem.Get(); err == nil { 94 mw.memoryUsed.Store(curMem.ActualUsed) 95 96 unreserve := true 97 for unreserve { 98 select { 99 case size := <-mw.unreserveMemoryCh: 100 mw.reservedMemory.Sub(size) 101 default: 102 unreserve = false 103 } 104 } 105 } 106 case <-mw.m.listenAborted(): 107 return 108 case <-mw.reserved.stopCh.Listen(): 109 return 110 } 111 } 112 } 113 114 // Runs in a goroutine, watches allocated memory, and frees SGLs if need be 115 // ------------------------------------------------------ 116 // We also watch the memory in `watchReserved` but this may be 117 // insufficient because there is more factors than just `SGL`s: `Records`, 118 // `Shards`, `RecordContents`, `ExtractionPaths` etc. All these structures 119 // require memory, sometimes it can be counted in GBs. That is why we also need 120 // excess watcher so that it prevents memory overuse. 121 // -------------------------------------------------------- 122 // Because Go's runtime does not immediately return freed memory 123 // to the system it'd be incorrect to treat `maxMemoryToUse - curMem.ActualUsed` 124 // as excessively allocated 125 func (mw *memoryWatcher) watchExcess(memStat sys.MemStat) { 126 defer mw.excess.wg.Done() 127 128 buf, slab := g.mm.Alloc() 129 defer slab.Free(buf) 130 131 lastMemoryUsage := memStat.ActualUsed 132 for { 133 select { 134 case <-mw.excess.ticker.C: 135 var curMem sys.MemStat 136 if err := curMem.Get(); err != nil { 137 continue 138 } 139 140 memExcess := int64(curMem.ActualUsed - lastMemoryUsage) 141 lastMemoryUsage = curMem.ActualUsed 142 143 if curMem.ActualUsed < mw.maxMemoryToUse { 144 continue 145 } 146 147 storeType := shard.DiskStoreType 148 if mw.m.shardRW.SupportsOffset() { 149 storeType = shard.OffsetStoreType 150 } 151 mw.m.recm.RecordContents().Range(func(key, value any) bool { 152 n := mw.m.recm.FreeMem(key.(string), storeType, value, buf) 153 memExcess -= n 154 return memExcess > 0 // continue if we need more 155 }) 156 157 cos.FreeMemToOS(false /*force*/) 158 case <-mw.m.listenAborted(): 159 return 160 case <-mw.excess.stopCh.Listen(): 161 return 162 } 163 } 164 } 165 166 func (mw *memoryWatcher) reserveMem(toReserve uint64) (exceeding bool) { 167 newReservedMemory := mw.reservedMemory.Add(toReserve) 168 // expected total memory after all objects will be extracted is equal 169 // to: previously reserved memory + uncompressed size of shard + current memory used 170 expectedTotalMemoryUsed := newReservedMemory + mw.memoryUsed.Load() 171 172 exceeding = expectedTotalMemoryUsed >= mw.maxMemoryToUse 173 return 174 } 175 176 func (mw *memoryWatcher) unreserveMem(toUnreserve uint64) { 177 mw.unreserveMemoryCh <- toUnreserve 178 } 179 180 func (mw *memoryWatcher) stopWatchingExcess() { 181 mw.excess.ticker.Stop() 182 mw.excess.stopCh.Close() 183 mw.excess.wg.Wait() 184 } 185 186 func (mw *memoryWatcher) stopWatchingReserved() { 187 mw.reserved.ticker.Stop() 188 mw.reserved.stopCh.Close() 189 mw.reserved.wg.Wait() 190 } 191 192 func (mw *memoryWatcher) stop() { 193 mw.stopWatchingExcess() 194 mw.stopWatchingReserved() 195 close(mw.unreserveMemoryCh) 196 } 197 198 type inmemShardAllocator struct { 199 mtx *sync.Mutex 200 cond *sync.Cond 201 202 maxAllocated uint64 203 allocated uint64 204 } 205 206 func newInmemShardAllocator(maxAllocated uint64) *inmemShardAllocator { 207 x := &inmemShardAllocator{ 208 mtx: &sync.Mutex{}, 209 maxAllocated: maxAllocated, 210 } 211 x.cond = sync.NewCond(x.mtx) 212 return x 213 } 214 215 func (sa *inmemShardAllocator) alloc(size uint64) { 216 sa.mtx.Lock() 217 218 for sa.freeMem() < size { 219 sa.cond.Wait() 220 } 221 222 sa.allocated += size 223 sa.mtx.Unlock() 224 } 225 226 func (sa *inmemShardAllocator) free(size uint64) { 227 sa.mtx.Lock() 228 sa.allocated -= size 229 sa.cond.Signal() 230 sa.mtx.Unlock() 231 } 232 233 func (sa *inmemShardAllocator) freeMem() uint64 { 234 return sa.maxAllocated - sa.allocated 235 }