github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgtspace.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/NVIDIA/aistore/api/apc"
    12  	"github.com/NVIDIA/aistore/cmn"
    13  	"github.com/NVIDIA/aistore/cmn/atomic"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/cmn/debug"
    16  	"github.com/NVIDIA/aistore/cmn/mono"
    17  	"github.com/NVIDIA/aistore/cmn/nlog"
    18  	"github.com/NVIDIA/aistore/core"
    19  	"github.com/NVIDIA/aistore/fs"
    20  	"github.com/NVIDIA/aistore/ios"
    21  	"github.com/NVIDIA/aistore/nl"
    22  	"github.com/NVIDIA/aistore/space"
    23  	"github.com/NVIDIA/aistore/xact"
    24  	"github.com/NVIDIA/aistore/xact/xreg"
    25  )
    26  
    27  const (
    28  	// - note that an API call (e.g. CLI) will go through anyway
    29  	// - compare with cmn/cos/oom.go
    30  	minAutoDetectInterval = 10 * time.Minute
    31  )
    32  
    33  var (
    34  	lastTrigOOS atomic.Int64
    35  )
    36  
    37  // triggers by an out-of-space condition or a suspicion of thereof
    38  func (t *target) OOS(csRefreshed *fs.CapStatus) (cs fs.CapStatus) {
    39  	var errCap error
    40  	if csRefreshed != nil {
    41  		cs = *csRefreshed
    42  		errCap = cs.Err()
    43  	} else {
    44  		var err error
    45  		cs, err, errCap = fs.CapRefresh(nil, nil)
    46  		if err != nil {
    47  			nlog.Errorln(t.String(), "failed to update capacity stats:", err)
    48  			return
    49  		}
    50  	}
    51  	if errCap == nil {
    52  		return // unlikely; nothing to do
    53  	}
    54  	if prev := lastTrigOOS.Load(); mono.Since(prev) < minAutoDetectInterval {
    55  		nlog.Warningf("%s: _not_ running store cleanup: (%v, %v), %s", t, prev, minAutoDetectInterval, cs.String())
    56  		return
    57  	}
    58  
    59  	nlog.Warningln(t.String(), "running store cleanup:", cs.String())
    60  	// run serially, cleanup first and LRU second, iff out-of-space persists
    61  	go func() {
    62  		cs := t.runStoreCleanup("" /*uuid*/, nil /*wg*/)
    63  		lastTrigOOS.Store(mono.NanoTime())
    64  		if cs.Err() != nil {
    65  			nlog.Warningln(t.String(), "still out of space, running LRU eviction now:", cs.String())
    66  			t.runLRU("" /*uuid*/, nil /*wg*/, false)
    67  		}
    68  	}()
    69  	return
    70  }
    71  
    72  func (t *target) runLRU(id string, wg *sync.WaitGroup, force bool, bcks ...cmn.Bck) {
    73  	regToIC := id == ""
    74  	if regToIC {
    75  		id = cos.GenUUID()
    76  	}
    77  	rns := xreg.RenewLRU(id)
    78  	if rns.Err != nil || rns.IsRunning() {
    79  		debug.Assert(rns.Err == nil || cmn.IsErrXactUsePrev(rns.Err))
    80  		if wg != nil {
    81  			wg.Done()
    82  		}
    83  		return
    84  	}
    85  	xlru := rns.Entry.Get()
    86  	if regToIC && xlru.ID() == id {
    87  		// pre-existing UUID: notify IC members
    88  		regMsg := xactRegMsg{UUID: id, Kind: apc.ActLRU, Srcs: []string{t.SID()}}
    89  		msg := t.newAmsgActVal(apc.ActRegGlobalXaction, regMsg)
    90  		t.bcastAsyncIC(msg)
    91  	}
    92  	ini := space.IniLRU{
    93  		Xaction:             xlru.(*space.XactLRU),
    94  		Config:              cmn.GCO.Get(),
    95  		StatsT:              t.statsT,
    96  		Buckets:             bcks,
    97  		GetFSUsedPercentage: ios.GetFSUsedPercentage,
    98  		GetFSStats:          ios.GetFSStats,
    99  		WG:                  wg,
   100  		Force:               force,
   101  	}
   102  	xlru.AddNotif(&xact.NotifXact{
   103  		Base: nl.Base{When: core.UponTerm, Dsts: []string{equalIC}, F: t.notifyTerm},
   104  		Xact: xlru,
   105  	})
   106  	space.RunLRU(&ini)
   107  }
   108  
   109  func (t *target) runStoreCleanup(id string, wg *sync.WaitGroup, bcks ...cmn.Bck) fs.CapStatus {
   110  	regToIC := id == ""
   111  	if regToIC {
   112  		id = cos.GenUUID()
   113  	}
   114  	rns := xreg.RenewStoreCleanup(id)
   115  	if rns.Err != nil || rns.IsRunning() {
   116  		debug.Assert(rns.Err == nil || cmn.IsErrXactUsePrev(rns.Err))
   117  		if wg != nil {
   118  			wg.Done()
   119  		}
   120  		return fs.CapStatus{}
   121  	}
   122  	xcln := rns.Entry.Get()
   123  	if regToIC && xcln.ID() == id {
   124  		// pre-existing UUID: notify IC members
   125  		regMsg := xactRegMsg{UUID: id, Kind: apc.ActStoreCleanup, Srcs: []string{t.SID()}}
   126  		msg := t.newAmsgActVal(apc.ActRegGlobalXaction, regMsg)
   127  		t.bcastAsyncIC(msg)
   128  	}
   129  	ini := space.IniCln{
   130  		Xaction: xcln.(*space.XactCln),
   131  		Config:  cmn.GCO.Get(),
   132  		StatsT:  t.statsT,
   133  		Buckets: bcks,
   134  		WG:      wg,
   135  	}
   136  	xcln.AddNotif(&xact.NotifXact{
   137  		Base: nl.Base{When: core.UponTerm, Dsts: []string{equalIC}, F: t.notifyTerm},
   138  		Xact: xcln,
   139  	})
   140  	return space.RunCleanup(&ini)
   141  }