github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ios/iostat.go (about)

     1  // Package ios is a collection of interfaces to the local storage subsystem;
     2  // the package includes OS-dependent implementations for those interfaces.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package ios
     7  
     8  import (
     9  	"fmt"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	ratomic "sync/atomic"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/cmn"
    17  	"github.com/NVIDIA/aistore/cmn/atomic"
    18  	"github.com/NVIDIA/aistore/cmn/cos"
    19  	"github.com/NVIDIA/aistore/cmn/debug"
    20  	"github.com/NVIDIA/aistore/cmn/mono"
    21  	"github.com/NVIDIA/aistore/cmn/nlog"
    22  )
    23  
    24  const statsdir = "/sys/class/block"
    25  
    26  // public
    27  type (
    28  	IOS interface {
    29  		GetAllMpathUtils() *MpathUtil
    30  		GetMpathUtil(mpath string) int64
    31  		AddMpath(mpath, fs string, label Label, config *cmn.Config) (FsDisks, error)
    32  		RemoveMpath(mpath string, testingEnv bool)
    33  		FillDiskStats(m AllDiskStats)
    34  	}
    35  	FsDisks   map[string]int64 // disk name => sector size
    36  	MpathUtil sync.Map
    37  )
    38  
    39  // internal
    40  type (
    41  	cache struct {
    42  		ioms   map[string]int64 // IO millis
    43  		util   map[string]int64 // utilization
    44  		rms    map[string]int64 // read millis
    45  		rbytes map[string]int64 // read bytes
    46  		reads  map[string]int64 // completed read requests
    47  		rbps   map[string]int64 // read B/s
    48  		ravg   map[string]int64 // average read size
    49  		wms    map[string]int64 // write millis
    50  		wbytes map[string]int64 // written bytes
    51  		writes map[string]int64 // completed write requests
    52  		wbps   map[string]int64 // write B/s
    53  		wavg   map[string]int64 // average write size
    54  
    55  		mpathUtil   map[string]int64 // Average utilization of the disks, range [0, 100].
    56  		mpathUtilRO MpathUtil        // Read-only copy of `mpathUtil`.
    57  
    58  		expireTime int64
    59  		timestamp  int64
    60  	}
    61  	ios struct {
    62  		mpath2disks map[string]FsDisks
    63  		disk2mpath  cos.StrKVs
    64  		disk2sysfn  cos.StrKVs
    65  		blockStats  allBlockStats
    66  		lsblk       ratomic.Pointer[LsBlk]
    67  		cache       ratomic.Pointer[cache]
    68  		cacheHst    [16]*cache
    69  		cacheIdx    int
    70  		mu          sync.Mutex
    71  		busy        atomic.Bool
    72  	}
    73  )
    74  
    75  // interface guard
    76  var _ IOS = (*ios)(nil)
    77  
    78  ///////////////
    79  // MpathUtil //
    80  ///////////////
    81  
    82  func (x *MpathUtil) Get(mpath string) int64 {
    83  	if v, ok := (*sync.Map)(x).Load(mpath); ok {
    84  		util := v.(int64)
    85  		return util
    86  	}
    87  	return 100 // assume the worst
    88  }
    89  
    90  func (x *MpathUtil) Set(mpath string, util int64) {
    91  	(*sync.Map)(x).Store(mpath, util)
    92  }
    93  
    94  /////////
    95  // ios //
    96  /////////
    97  
    98  func New(num int) IOS {
    99  	ios := &ios{
   100  		mpath2disks: make(map[string]FsDisks, num),
   101  		disk2mpath:  make(cos.StrKVs, num),
   102  		disk2sysfn:  make(cos.StrKVs, num),
   103  		blockStats:  make(allBlockStats, num),
   104  	}
   105  	for i := range len(ios.cacheHst) {
   106  		ios.cacheHst[i] = newCache(num)
   107  	}
   108  	ios._put(ios.cacheHst[0])
   109  	ios.cacheIdx = 0
   110  	ios.busy.Store(false) // redundant on purpose
   111  
   112  	// once (cleared via Clblk)
   113  	if res := lsblk("new-ios", true); res != nil {
   114  		ios.lsblk.Store(res)
   115  	}
   116  
   117  	return ios
   118  }
   119  
   120  func Clblk(i IOS) {
   121  	ios := i.(*ios)
   122  	ios.lsblk.Store(nil)
   123  }
   124  
   125  func newCache(num int) *cache {
   126  	return &cache{
   127  		ioms:      make(map[string]int64, num),
   128  		util:      make(map[string]int64, num),
   129  		rms:       make(map[string]int64, num),
   130  		rbytes:    make(map[string]int64, num),
   131  		reads:     make(map[string]int64, num),
   132  		rbps:      make(map[string]int64, num),
   133  		ravg:      make(map[string]int64, num),
   134  		wms:       make(map[string]int64, num),
   135  		wbytes:    make(map[string]int64, num),
   136  		writes:    make(map[string]int64, num),
   137  		wbps:      make(map[string]int64, num),
   138  		wavg:      make(map[string]int64, num),
   139  		mpathUtil: make(map[string]int64, num),
   140  	}
   141  }
   142  
   143  func (ios *ios) _get() *cache      { return ios.cache.Load() }
   144  func (ios *ios) _put(cache *cache) { ios.cache.Store(cache) }
   145  
   146  //
   147  // add mountpath
   148  //
   149  
   150  func (ios *ios) AddMpath(mpath, fs string, label Label, config *cmn.Config) (fsdisks FsDisks, err error) {
   151  	var (
   152  		warn       string
   153  		testingEnv = config.TestingEnv()
   154  		fspaths    = config.LocalConfig.FSP.Paths
   155  	)
   156  	if pres := ios.lsblk.Load(); pres != nil {
   157  		res := *pres
   158  		fsdisks, err = fs2disks(&res, fs, label, len(fspaths), testingEnv)
   159  	} else {
   160  		res := lsblk(fs, testingEnv)
   161  		if res != nil {
   162  			fsdisks, err = fs2disks(res, fs, label, len(fspaths), testingEnv)
   163  		}
   164  	}
   165  	if len(fsdisks) == 0 || err != nil {
   166  		return
   167  	}
   168  	ios.mu.Lock()
   169  	warn, err = ios._add(mpath, label, fsdisks, fspaths, testingEnv)
   170  	ios.mu.Unlock()
   171  
   172  	if err != nil {
   173  		nlog.Errorln(err)
   174  	}
   175  	if warn != "" {
   176  		nlog.Infoln(warn)
   177  	}
   178  	return
   179  }
   180  
   181  func (ios *ios) _add(mpath string, label Label, fsdisks FsDisks, fspaths cos.StrKVs, testingEnv bool) (warn string, _ error) {
   182  	if dd, ok := ios.mpath2disks[mpath]; ok {
   183  		return "", fmt.Errorf("duplicate mountpath %s (disks %s, %s)", mpath, dd._str(), fsdisks._str())
   184  	}
   185  
   186  	ios.mpath2disks[mpath] = fsdisks
   187  	for disk := range fsdisks {
   188  		if mp, ok := ios.disk2mpath[disk]; ok && !testingEnv && !cmn.AllowSharedDisksAndNoDisks {
   189  			if label.IsNil() {
   190  				return "", fmt.Errorf("disk %s is shared between mountpaths %s and %s", disk, mpath, mp)
   191  			}
   192  			var otherLabel Label
   193  			if o, ok := fspaths[mp]; ok {
   194  				otherLabel = Label(o)
   195  			}
   196  			warn = fmt.Sprintf("Warning: disk %s is shared between %s%s and %s%s",
   197  				disk, mpath, label.ToLog(), mp, otherLabel.ToLog())
   198  		}
   199  		ios.disk2mpath[disk] = mpath
   200  		ios.blockStats[disk] = &blockStats{}
   201  	}
   202  
   203  	for disk, mountpath := range ios.disk2mpath {
   204  		if _, ok := ios.disk2sysfn[disk]; ok {
   205  			continue
   206  		}
   207  		path := filepath.Join(statsdir, disk, "stat")
   208  		ios.disk2sysfn[disk] = path
   209  
   210  		// multipath NVMe: alternative block-stats location
   211  		cdisk, err := icn(disk, statsdir)
   212  		if err != nil {
   213  			if label.IsNil() {
   214  				return "", err
   215  			}
   216  			if warn != "" {
   217  				warn += "\n"
   218  			}
   219  			warn += fmt.Sprint("Warning:", err)
   220  		}
   221  		if cdisk != "" {
   222  			cpath := filepath.Join(statsdir, cdisk, "stat")
   223  			if icnPath(ios.disk2sysfn[disk], cpath, mountpath) {
   224  				if warn != "" {
   225  					warn += "\n"
   226  				}
   227  				warn += fmt.Sprint("Info: alternative block-stats path:", disk, path, "=>", cdisk, cpath)
   228  				ios.disk2sysfn[disk] = cpath
   229  			}
   230  		}
   231  	}
   232  	if len(ios.disk2sysfn) != len(ios.disk2mpath) {
   233  		for disk := range ios.disk2sysfn {
   234  			if _, ok := ios.disk2mpath[disk]; !ok {
   235  				delete(ios.disk2sysfn, disk)
   236  			}
   237  		}
   238  	}
   239  	return warn, nil
   240  }
   241  
   242  //
   243  // remove mountpath
   244  //
   245  
   246  func (ios *ios) RemoveMpath(mpath string, testingEnv bool) {
   247  	ios.mu.Lock()
   248  	ios._del(mpath, testingEnv)
   249  	ios.mu.Unlock()
   250  }
   251  
   252  func (ios *ios) _del(mpath string, testingEnv bool) {
   253  	oldDisks, ok := ios.mpath2disks[mpath]
   254  	if !ok {
   255  		nlog.Warningf("mountpath %s already removed", mpath)
   256  		return
   257  	}
   258  	for disk := range oldDisks {
   259  		if testingEnv {
   260  			ios._delDiskTesting(mpath, disk)
   261  		} else {
   262  			ios._delDisk(mpath, disk)
   263  		}
   264  	}
   265  	delete(ios.mpath2disks, mpath)
   266  }
   267  
   268  // TestingEnv ("disk sharing"):
   269  // If another mountpath containing the same disk is found, the disk2mpath map
   270  // gets updated. Otherwise, go ahead and remove the "disk".
   271  func (ios *ios) _delDiskTesting(mpath, disk string) {
   272  	if _, ok := ios.disk2mpath[disk]; !ok {
   273  		return
   274  	}
   275  	for path, disks := range ios.mpath2disks {
   276  		if path == mpath {
   277  			continue
   278  		}
   279  		for dsk := range disks {
   280  			if dsk == disk {
   281  				ios.disk2mpath[disk] = path // found - keeping
   282  				return
   283  			}
   284  		}
   285  	}
   286  	delete(ios.mpath2disks, disk)
   287  }
   288  
   289  func (ios *ios) _delDisk(mpath, disk string) {
   290  	mp, ok := ios.disk2mpath[disk]
   291  	if !ok {
   292  		return
   293  	}
   294  	debug.Assertf(mp == mpath, "(mpath %s => disk %s => mpath %s) violation", mp, disk, mpath)
   295  	delete(ios.disk2mpath, disk)
   296  	delete(ios.blockStats, disk)
   297  }
   298  
   299  //
   300  // get utilization and stats; refresh stats periodically
   301  //
   302  
   303  func (ios *ios) GetAllMpathUtils() *MpathUtil {
   304  	cache := ios.refresh()
   305  	return &cache.mpathUtilRO
   306  }
   307  
   308  func (ios *ios) GetMpathUtil(mpath string) int64 {
   309  	return ios.GetAllMpathUtils().Get(mpath)
   310  }
   311  
   312  func (ios *ios) FillDiskStats(m AllDiskStats) {
   313  	cache := ios.refresh()
   314  	for disk := range cache.ioms {
   315  		m[disk] = DiskStats{
   316  			RBps: cache.rbps[disk],
   317  			Ravg: cache.ravg[disk],
   318  			WBps: cache.wbps[disk],
   319  			Wavg: cache.wavg[disk],
   320  			Util: cache.util[disk],
   321  		}
   322  	}
   323  	for disk := range m {
   324  		if _, ok := cache.ioms[disk]; !ok {
   325  			delete(m, disk)
   326  		}
   327  	}
   328  }
   329  
   330  // update iostat cache
   331  func (ios *ios) refresh() *cache {
   332  	var (
   333  		nowTs      = mono.NanoTime()
   334  		statsCache = ios._get()
   335  	)
   336  	if statsCache.expireTime > nowTs {
   337  		return statsCache
   338  	}
   339  	if !ios.busy.CAS(false, true) {
   340  		return statsCache // never want callers to wait
   341  	}
   342  
   343  	ncache := ios.doRefresh(nowTs)
   344  	ios.busy.Store(false)
   345  	return ncache
   346  }
   347  
   348  func (ios *ios) doRefresh(nowTs int64) *cache {
   349  	config := cmn.GCO.Get()
   350  	ios.mu.Lock()
   351  	ncache, maxUtil, missingInfo := ios._ref(config)
   352  	ios.mu.Unlock()
   353  
   354  	var expireTime int64
   355  	if missingInfo {
   356  		expireTime = int64(config.Disk.IostatTimeShort)
   357  	} else { // use the maximum utilization to determine expiration time
   358  		var (
   359  			lowm      = max(config.Disk.DiskUtilLowWM, 1)
   360  			hiwm      = min(config.Disk.DiskUtilHighWM, 100)
   361  			delta     = int64(config.Disk.IostatTimeLong - config.Disk.IostatTimeShort)
   362  			utilRatio = cos.RatioPct(hiwm, lowm, maxUtil)
   363  		)
   364  		utilRatio = (utilRatio + 5) / 10 * 10 // round to nearest tenth
   365  		expireTime = int64(config.Disk.IostatTimeShort) + delta*(100-utilRatio)/100
   366  	}
   367  	ncache.expireTime = nowTs + expireTime
   368  	ios._put(ncache)
   369  
   370  	return ncache
   371  }
   372  
   373  func (ios *ios) _ref(config *cmn.Config) (ncache *cache, maxUtil int64, missingInfo bool) {
   374  	ios.cacheIdx++
   375  	ios.cacheIdx %= len(ios.cacheHst)
   376  	ncache = ios.cacheHst[ios.cacheIdx] // from a pool
   377  
   378  	var (
   379  		statsCache     = ios._get()
   380  		nowTs          = mono.NanoTime()
   381  		elapsed        = nowTs - statsCache.timestamp
   382  		elapsedSeconds = cos.DivRound(elapsed, int64(time.Second))
   383  		elapsedMillis  = cos.DivRound(elapsed, int64(time.Millisecond))
   384  	)
   385  
   386  	ncache.timestamp = nowTs
   387  	for mpath := range ios.mpath2disks {
   388  		ncache.mpathUtil[mpath] = 0
   389  	}
   390  	for disk := range ncache.ioms {
   391  		if _, ok := ios.disk2mpath[disk]; !ok {
   392  			ncache = newCache(len(statsCache.ioms))
   393  			ios.cacheHst[ios.cacheIdx] = ncache
   394  		}
   395  	}
   396  
   397  	readStats(ios.disk2mpath, ios.disk2sysfn, ios.blockStats)
   398  	for disk, mpath := range ios.disk2mpath {
   399  		ncache.rbps[disk] = 0
   400  		ncache.wbps[disk] = 0
   401  		ncache.util[disk] = 0
   402  		ncache.ravg[disk] = 0
   403  		ncache.wavg[disk] = 0
   404  		ds := ios.blockStats[disk]
   405  		ncache.ioms[disk] = ds.IOMs()
   406  		ncache.rms[disk] = ds.ReadMs()
   407  		ncache.rbytes[disk] = ds.ReadBytes()
   408  		ncache.reads[disk] = ds.Reads()
   409  		ncache.wms[disk] = ds.WriteMs()
   410  		ncache.wbytes[disk] = ds.WriteBytes()
   411  		ncache.writes[disk] = ds.Writes()
   412  
   413  		if _, ok := statsCache.ioms[disk]; !ok {
   414  			missingInfo = true
   415  			continue
   416  		}
   417  		// deltas
   418  		var (
   419  			ioMs       = ncache.ioms[disk] - statsCache.ioms[disk]
   420  			reads      = ncache.reads[disk] - statsCache.reads[disk]
   421  			writes     = ncache.writes[disk] - statsCache.writes[disk]
   422  			readBytes  = ncache.rbytes[disk] - statsCache.rbytes[disk]
   423  			writeBytes = ncache.wbytes[disk] - statsCache.wbytes[disk]
   424  		)
   425  		if elapsedMillis > 0 {
   426  			// On macOS computation of `diskUtil` may sometimes exceed 100%
   427  			// which may cause some further inaccuracies.
   428  			if ioMs >= elapsedMillis {
   429  				ncache.util[disk] = 100
   430  			} else {
   431  				ncache.util[disk] = cos.DivRound(ioMs*100, elapsedMillis)
   432  			}
   433  		} else {
   434  			ncache.util[disk] = statsCache.util[disk]
   435  		}
   436  		if !config.TestingEnv() {
   437  			ncache.mpathUtil[mpath] += ncache.util[disk]
   438  		}
   439  		if elapsedSeconds > 0 {
   440  			ncache.rbps[disk] = cos.DivRound(readBytes, elapsedSeconds)
   441  			ncache.wbps[disk] = cos.DivRound(writeBytes, elapsedSeconds)
   442  		} else {
   443  			ncache.rbps[disk] = statsCache.rbps[disk]
   444  			ncache.wbps[disk] = statsCache.wbps[disk]
   445  		}
   446  		if reads > 0 {
   447  			ncache.ravg[disk] = cos.DivRound(readBytes, reads)
   448  		} else if elapsedSeconds == 0 {
   449  			ncache.ravg[disk] = statsCache.ravg[disk]
   450  		} else {
   451  			ncache.ravg[disk] = 0
   452  		}
   453  		if writes > 0 {
   454  			ncache.wavg[disk] = cos.DivRound(writeBytes, writes)
   455  		} else if elapsedSeconds == 0 {
   456  			ncache.wavg[disk] = statsCache.wavg[disk]
   457  		} else {
   458  			ncache.wavg[disk] = 0
   459  		}
   460  	}
   461  
   462  	// average and max
   463  	if config.TestingEnv() {
   464  		for mpath, disks := range ios.mpath2disks {
   465  			debug.Assert(len(disks) <= 1) // testing env: one (shared) disk per mpath
   466  			var u int64
   467  			for d := range disks {
   468  				u = ncache.util[d]
   469  				ncache.mpathUtil[mpath] = u
   470  				break
   471  			}
   472  			ncache.mpathUtilRO.Set(mpath, u)
   473  			maxUtil = max(maxUtil, u)
   474  		}
   475  		return
   476  	}
   477  
   478  	for mpath, disks := range ios.mpath2disks {
   479  		num := int64(len(disks))
   480  		if num == 0 {
   481  			debug.Assert(ncache.mpathUtil[mpath] == 0)
   482  			continue
   483  		}
   484  		u := cos.DivRound(ncache.mpathUtil[mpath], num)
   485  		ncache.mpathUtil[mpath] = u
   486  		ncache.mpathUtilRO.Set(mpath, u)
   487  		maxUtil = max(maxUtil, u)
   488  	}
   489  	return
   490  }
   491  
   492  func (disks FsDisks) _str() string {
   493  	s := fmt.Sprintf("%v", disks) // with sector sizes
   494  	return strings.TrimPrefix(s, "map")
   495  }