github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/mem_watcher.go

github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/mem_watcher.go (about)

     1  // Package dsort provides APIs for distributed archive file shuffling.
     2  /*
     3   * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package dsort
     6  
     7  import (
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/NVIDIA/aistore/cmn/atomic"
    12  	"github.com/NVIDIA/aistore/cmn/cos"
    13  	"github.com/NVIDIA/aistore/ext/dsort/shard"
    14  	"github.com/NVIDIA/aistore/sys"
    15  )
    16  
    17  const (
    18  	memoryReservedInterval    = 50 * time.Millisecond
    19  	memoryExcessInterval      = time.Second
    20  	unreserveMemoryBufferSize = 10000
    21  )
    22  
    23  type singleMemoryWatcher struct {
    24  	wg     *sync.WaitGroup
    25  	ticker *time.Ticker
    26  	stopCh cos.StopCh
    27  }
    28  
    29  // memoryWatcher is responsible for monitoring memory changes and decide
    30  // whether specific action should happen or not. It may also decide to return
    31  type memoryWatcher struct {
    32  	m *Manager
    33  
    34  	excess, reserved  *singleMemoryWatcher
    35  	maxMemoryToUse    uint64
    36  	reservedMemory    atomic.Uint64
    37  	memoryUsed        atomic.Uint64 // memory used in specific point in time, it is refreshed once in a while
    38  	unreserveMemoryCh chan uint64
    39  }
    40  
    41  func newSingleMemoryWatcher(interval time.Duration) *singleMemoryWatcher {
    42  	smw := &singleMemoryWatcher{wg: &sync.WaitGroup{}, ticker: time.NewTicker(interval)}
    43  	smw.stopCh.Init()
    44  	return smw
    45  }
    46  
    47  func newMemoryWatcher(m *Manager, maxMemoryUsage uint64) *memoryWatcher {
    48  	return &memoryWatcher{
    49  		m: m,
    50  
    51  		excess:            newSingleMemoryWatcher(memoryExcessInterval),
    52  		reserved:          newSingleMemoryWatcher(memoryReservedInterval),
    53  		maxMemoryToUse:    maxMemoryUsage,
    54  		unreserveMemoryCh: make(chan uint64, unreserveMemoryBufferSize),
    55  	}
    56  }
    57  
    58  func (mw *memoryWatcher) watch() error {
    59  	var mem sys.MemStat
    60  	if err := mem.Get(); err != nil {
    61  		return err
    62  	}
    63  	mw.memoryUsed.Store(mem.ActualUsed)
    64  
    65  	mw.reserved.wg.Add(1)
    66  	go mw.watchReserved()
    67  	mw.excess.wg.Add(1)
    68  	go mw.watchExcess(mem)
    69  	return nil
    70  }
    71  
    72  func (mw *memoryWatcher) watchReserved() {
    73  	defer mw.reserved.wg.Done()
    74  
    75  	// Starting memory updater. Since extraction phase is concurrent and we
    76  	// cannot know how much memory will given compressed shard extract we need
    77  	// to employ mechanism for updating memory. Just before extraction we
    78  	// estimate how much memory shard will contain (by multiplying file size and
    79  	// avg compress ratio). Then we update currently used memory to actual used
    80  	// + reserved. After we finish extraction we put reserved memory for the
    81  	// shard into the unreserve memory channel. Note that we cannot unreserve it
    82  	// right away because actual used memory has not yet been updated (but it
    83  	// surely changed). Once memory updater will fetch and update currently used
    84  	// memory in system we can unreserve memory (it is already calculated in
    85  	// newly fetched memory usage value). This way it is almost impossible to
    86  	// exceed maximum memory which we are able to use (set by user) -
    87  	// unfortunately it can happen when we underestimate the amount of memory
    88  	// which we will use when extracting compressed file.
    89  	for {
    90  		select {
    91  		case <-mw.reserved.ticker.C:
    92  			var curMem sys.MemStat
    93  			if err := curMem.Get(); err == nil {
    94  				mw.memoryUsed.Store(curMem.ActualUsed)
    95  
    96  				unreserve := true
    97  				for unreserve {
    98  					select {
    99  					case size := <-mw.unreserveMemoryCh:
   100  						mw.reservedMemory.Sub(size)
   101  					default:
   102  						unreserve = false
   103  					}
   104  				}
   105  			}
   106  		case <-mw.m.listenAborted():
   107  			return
   108  		case <-mw.reserved.stopCh.Listen():
   109  			return
   110  		}
   111  	}
   112  }
   113  
   114  // Runs in a goroutine, watches allocated memory, and frees SGLs if need be
   115  // ------------------------------------------------------
   116  // We also watch the memory in `watchReserved` but this may be
   117  // insufficient because there is more factors than just `SGL`s: `Records`,
   118  // `Shards`, `RecordContents`, `ExtractionPaths` etc. All these structures
   119  // require memory, sometimes it can be counted in GBs. That is why we also need
   120  // excess watcher so that it prevents memory overuse.
   121  // --------------------------------------------------------
   122  // Because Go's runtime does not immediately return freed memory
   123  // to the system it'd be incorrect to treat `maxMemoryToUse - curMem.ActualUsed`
   124  // as excessively allocated
   125  func (mw *memoryWatcher) watchExcess(memStat sys.MemStat) {
   126  	defer mw.excess.wg.Done()
   127  
   128  	buf, slab := g.mm.Alloc()
   129  	defer slab.Free(buf)
   130  
   131  	lastMemoryUsage := memStat.ActualUsed
   132  	for {
   133  		select {
   134  		case <-mw.excess.ticker.C:
   135  			var curMem sys.MemStat
   136  			if err := curMem.Get(); err != nil {
   137  				continue
   138  			}
   139  
   140  			memExcess := int64(curMem.ActualUsed - lastMemoryUsage)
   141  			lastMemoryUsage = curMem.ActualUsed
   142  
   143  			if curMem.ActualUsed < mw.maxMemoryToUse {
   144  				continue
   145  			}
   146  
   147  			storeType := shard.DiskStoreType
   148  			if mw.m.shardRW.SupportsOffset() {
   149  				storeType = shard.OffsetStoreType
   150  			}
   151  			mw.m.recm.RecordContents().Range(func(key, value any) bool {
   152  				n := mw.m.recm.FreeMem(key.(string), storeType, value, buf)
   153  				memExcess -= n
   154  				return memExcess > 0 // continue if we need more
   155  			})
   156  
   157  			cos.FreeMemToOS(false /*force*/)
   158  		case <-mw.m.listenAborted():
   159  			return
   160  		case <-mw.excess.stopCh.Listen():
   161  			return
   162  		}
   163  	}
   164  }
   165  
   166  func (mw *memoryWatcher) reserveMem(toReserve uint64) (exceeding bool) {
   167  	newReservedMemory := mw.reservedMemory.Add(toReserve)
   168  	// expected total memory after all objects will be extracted is equal
   169  	// to: previously reserved memory + uncompressed size of shard + current memory used
   170  	expectedTotalMemoryUsed := newReservedMemory + mw.memoryUsed.Load()
   171  
   172  	exceeding = expectedTotalMemoryUsed >= mw.maxMemoryToUse
   173  	return
   174  }
   175  
   176  func (mw *memoryWatcher) unreserveMem(toUnreserve uint64) {
   177  	mw.unreserveMemoryCh <- toUnreserve
   178  }
   179  
   180  func (mw *memoryWatcher) stopWatchingExcess() {
   181  	mw.excess.ticker.Stop()
   182  	mw.excess.stopCh.Close()
   183  	mw.excess.wg.Wait()
   184  }
   185  
   186  func (mw *memoryWatcher) stopWatchingReserved() {
   187  	mw.reserved.ticker.Stop()
   188  	mw.reserved.stopCh.Close()
   189  	mw.reserved.wg.Wait()
   190  }
   191  
   192  func (mw *memoryWatcher) stop() {
   193  	mw.stopWatchingExcess()
   194  	mw.stopWatchingReserved()
   195  	close(mw.unreserveMemoryCh)
   196  }
   197  
   198  type inmemShardAllocator struct {
   199  	mtx  *sync.Mutex
   200  	cond *sync.Cond
   201  
   202  	maxAllocated uint64
   203  	allocated    uint64
   204  }
   205  
   206  func newInmemShardAllocator(maxAllocated uint64) *inmemShardAllocator {
   207  	x := &inmemShardAllocator{
   208  		mtx:          &sync.Mutex{},
   209  		maxAllocated: maxAllocated,
   210  	}
   211  	x.cond = sync.NewCond(x.mtx)
   212  	return x
   213  }
   214  
   215  func (sa *inmemShardAllocator) alloc(size uint64) {
   216  	sa.mtx.Lock()
   217  
   218  	for sa.freeMem() < size {
   219  		sa.cond.Wait()
   220  	}
   221  
   222  	sa.allocated += size
   223  	sa.mtx.Unlock()
   224  }
   225  
   226  func (sa *inmemShardAllocator) free(size uint64) {
   227  	sa.mtx.Lock()
   228  	sa.allocated -= size
   229  	sa.cond.Signal()
   230  	sa.mtx.Unlock()
   231  }
   232  
   233  func (sa *inmemShardAllocator) freeMem() uint64 {
   234  	return sa.maxAllocated - sa.allocated
   235  }