github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/xl-storage-disk-id-check.go (about)

     1  // Copyright (c) 2015-2024 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"math/rand"
    26  	"runtime"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"github.com/minio/madmin-go/v3"
    34  	"github.com/minio/minio/internal/cachevalue"
    35  	xioutil "github.com/minio/minio/internal/ioutil"
    36  	"github.com/minio/minio/internal/logger"
    37  )
    38  
    39  //go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE
    40  
    41  type storageMetric uint8
    42  
    43  const (
    44  	storageMetricMakeVolBulk storageMetric = iota
    45  	storageMetricMakeVol
    46  	storageMetricListVols
    47  	storageMetricStatVol
    48  	storageMetricDeleteVol
    49  	storageMetricWalkDir
    50  	storageMetricListDir
    51  	storageMetricReadFile
    52  	storageMetricAppendFile
    53  	storageMetricCreateFile
    54  	storageMetricReadFileStream
    55  	storageMetricRenameFile
    56  	storageMetricRenameData
    57  	storageMetricCheckParts
    58  	storageMetricDelete
    59  	storageMetricDeleteVersions
    60  	storageMetricVerifyFile
    61  	storageMetricWriteAll
    62  	storageMetricDeleteVersion
    63  	storageMetricWriteMetadata
    64  	storageMetricUpdateMetadata
    65  	storageMetricReadVersion
    66  	storageMetricReadXL
    67  	storageMetricReadAll
    68  	storageMetricStatInfoFile
    69  	storageMetricReadMultiple
    70  	storageMetricDeleteAbandonedParts
    71  	storageMetricDiskInfo
    72  
    73  	// .... add more
    74  
    75  	storageMetricLast
    76  )
    77  
    78  // Detects change in underlying disk.
    79  type xlStorageDiskIDCheck struct {
    80  	totalWrites           atomic.Uint64
    81  	totalDeletes          atomic.Uint64
    82  	totalErrsAvailability atomic.Uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
    83  	totalErrsTimeout      atomic.Uint64 // Captures all timeout only errors
    84  
    85  	// apiCalls should be placed first so alignment is guaranteed for atomic operations.
    86  	apiCalls     [storageMetricLast]uint64
    87  	apiLatencies [storageMetricLast]*lockedLastMinuteLatency
    88  	diskID       string
    89  	storage      *xlStorage
    90  	health       *diskHealthTracker
    91  	healthCheck  bool
    92  
    93  	metricsCache *cachevalue.Cache[DiskMetrics]
    94  	diskCtx      context.Context
    95  	diskCancel   context.CancelFunc
    96  }
    97  
    98  func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
    99  	p.metricsCache.InitOnce(5*time.Second,
   100  		cachevalue.Opts{},
   101  		func() (DiskMetrics, error) {
   102  			diskMetric := DiskMetrics{
   103  				LastMinute: make(map[string]AccElem, len(p.apiLatencies)),
   104  				APICalls:   make(map[string]uint64, len(p.apiCalls)),
   105  			}
   106  			for i, v := range p.apiLatencies {
   107  				diskMetric.LastMinute[storageMetric(i).String()] = v.total()
   108  			}
   109  			for i := range p.apiCalls {
   110  				diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i])
   111  			}
   112  			return diskMetric, nil
   113  		},
   114  	)
   115  
   116  	diskMetric, _ := p.metricsCache.Get()
   117  	// Do not need this value to be cached.
   118  	diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load()
   119  	diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load()
   120  
   121  	return diskMetric
   122  }
   123  
   124  // lockedLastMinuteLatency accumulates totals lockless for each second.
   125  type lockedLastMinuteLatency struct {
   126  	cachedSec int64
   127  	cached    atomic.Pointer[AccElem]
   128  	mu        sync.Mutex
   129  	init      sync.Once
   130  	lastMinuteLatency
   131  }
   132  
   133  func (e *lockedLastMinuteLatency) add(value time.Duration) {
   134  	e.addSize(value, 0)
   135  }
   136  
   137  // addSize will add a duration and size.
   138  func (e *lockedLastMinuteLatency) addSize(value time.Duration, sz int64) {
   139  	// alloc on every call, so we have a clean entry to swap in.
   140  	t := time.Now().Unix()
   141  	e.init.Do(func() {
   142  		e.cached.Store(&AccElem{})
   143  		atomic.StoreInt64(&e.cachedSec, t)
   144  	})
   145  	acc := e.cached.Load()
   146  	if lastT := atomic.LoadInt64(&e.cachedSec); lastT != t {
   147  		// Check if lastT was changed by someone else.
   148  		if atomic.CompareAndSwapInt64(&e.cachedSec, lastT, t) {
   149  			// Now we swap in a new.
   150  			newAcc := &AccElem{}
   151  			old := e.cached.Swap(newAcc)
   152  			var a AccElem
   153  			a.Size = atomic.LoadInt64(&old.Size)
   154  			a.Total = atomic.LoadInt64(&old.Total)
   155  			a.N = atomic.LoadInt64(&old.N)
   156  			e.mu.Lock()
   157  			e.lastMinuteLatency.addAll(t-1, a)
   158  			e.mu.Unlock()
   159  			acc = newAcc
   160  		} else {
   161  			// We may be able to grab the new accumulator by yielding.
   162  			runtime.Gosched()
   163  			acc = e.cached.Load()
   164  		}
   165  	}
   166  	atomic.AddInt64(&acc.N, 1)
   167  	atomic.AddInt64(&acc.Total, int64(value))
   168  	atomic.AddInt64(&acc.Size, sz)
   169  }
   170  
   171  // total returns the total call count and latency for the last minute.
   172  func (e *lockedLastMinuteLatency) total() AccElem {
   173  	e.mu.Lock()
   174  	defer e.mu.Unlock()
   175  	return e.lastMinuteLatency.getTotal()
   176  }
   177  
   178  func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck {
   179  	xl := xlStorageDiskIDCheck{
   180  		storage:      storage,
   181  		health:       newDiskHealthTracker(),
   182  		healthCheck:  healthCheck && globalDriveMonitoring,
   183  		metricsCache: cachevalue.New[DiskMetrics](),
   184  	}
   185  
   186  	xl.totalWrites.Store(xl.storage.getWriteAttribute())
   187  	xl.totalDeletes.Store(xl.storage.getDeleteAttribute())
   188  	xl.diskCtx, xl.diskCancel = context.WithCancel(context.TODO())
   189  	for i := range xl.apiLatencies[:] {
   190  		xl.apiLatencies[i] = &lockedLastMinuteLatency{}
   191  	}
   192  	if xl.healthCheck {
   193  		go xl.monitorDiskWritable(xl.diskCtx)
   194  	}
   195  	return &xl
   196  }
   197  
   198  func (p *xlStorageDiskIDCheck) String() string {
   199  	return p.storage.String()
   200  }
   201  
   202  func (p *xlStorageDiskIDCheck) IsOnline() bool {
   203  	storedDiskID, err := p.storage.GetDiskID()
   204  	if err != nil {
   205  		return false
   206  	}
   207  	return storedDiskID == p.diskID
   208  }
   209  
   210  func (p *xlStorageDiskIDCheck) LastConn() time.Time {
   211  	return p.storage.LastConn()
   212  }
   213  
   214  func (p *xlStorageDiskIDCheck) IsLocal() bool {
   215  	return p.storage.IsLocal()
   216  }
   217  
   218  func (p *xlStorageDiskIDCheck) Endpoint() Endpoint {
   219  	return p.storage.Endpoint()
   220  }
   221  
   222  func (p *xlStorageDiskIDCheck) Hostname() string {
   223  	return p.storage.Hostname()
   224  }
   225  
   226  func (p *xlStorageDiskIDCheck) Healing() *healingTracker {
   227  	return p.storage.Healing()
   228  }
   229  
   230  func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) {
   231  	if contextCanceled(ctx) {
   232  		xioutil.SafeClose(updates)
   233  		return dataUsageCache{}, ctx.Err()
   234  	}
   235  
   236  	if err := p.checkDiskStale(); err != nil {
   237  		xioutil.SafeClose(updates)
   238  		return dataUsageCache{}, err
   239  	}
   240  
   241  	weSleep := func() bool {
   242  		return scannerIdleMode.Load() == 0
   243  	}
   244  
   245  	return p.storage.NSScanner(ctx, cache, updates, scanMode, weSleep)
   246  }
   247  
   248  func (p *xlStorageDiskIDCheck) SetFormatData(b []byte) {
   249  	p.storage.SetFormatData(b)
   250  }
   251  
   252  func (p *xlStorageDiskIDCheck) GetDiskLoc() (poolIdx, setIdx, diskIdx int) {
   253  	return p.storage.GetDiskLoc()
   254  }
   255  
   256  func (p *xlStorageDiskIDCheck) SetDiskLoc(poolIdx, setIdx, diskIdx int) {
   257  	p.storage.SetDiskLoc(poolIdx, setIdx, diskIdx)
   258  }
   259  
   260  func (p *xlStorageDiskIDCheck) Close() error {
   261  	p.diskCancel()
   262  	return p.storage.Close()
   263  }
   264  
   265  func (p *xlStorageDiskIDCheck) GetDiskID() (string, error) {
   266  	return p.storage.GetDiskID()
   267  }
   268  
   269  func (p *xlStorageDiskIDCheck) SetDiskID(id string) {
   270  	p.diskID = id
   271  }
   272  
   273  func (p *xlStorageDiskIDCheck) checkDiskStale() error {
   274  	if p.diskID == "" {
   275  		// For empty disk-id we allow the call as the server might be
   276  		// coming up and trying to read format.json or create format.json
   277  		return nil
   278  	}
   279  	storedDiskID, err := p.storage.GetDiskID()
   280  	if err != nil {
   281  		// return any error generated while reading `format.json`
   282  		return err
   283  	}
   284  	if err == nil && p.diskID == storedDiskID {
   285  		return nil
   286  	}
   287  	// not the same disk we remember, take it offline.
   288  	return errDiskNotFound
   289  }
   290  
   291  func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context, opts DiskInfoOptions) (info DiskInfo, err error) {
   292  	if contextCanceled(ctx) {
   293  		return DiskInfo{}, ctx.Err()
   294  	}
   295  
   296  	si := p.updateStorageMetrics(storageMetricDiskInfo)
   297  	defer si(&err)
   298  
   299  	if opts.NoOp {
   300  		if opts.Metrics {
   301  			info.Metrics = p.getMetrics()
   302  		}
   303  		info.Metrics.TotalWrites = p.totalWrites.Load()
   304  		info.Metrics.TotalDeletes = p.totalDeletes.Load()
   305  		info.Metrics.TotalWaiting = uint32(p.health.waiting.Load())
   306  		info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load()
   307  		info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load()
   308  		if p.health.isFaulty() {
   309  			// if disk is already faulty return faulty for 'mc admin info' output and prometheus alerts.
   310  			return info, errFaultyDisk
   311  		}
   312  		return info, nil
   313  	}
   314  
   315  	defer func() {
   316  		if opts.Metrics {
   317  			info.Metrics = p.getMetrics()
   318  		}
   319  		info.Metrics.TotalWrites = p.totalWrites.Load()
   320  		info.Metrics.TotalDeletes = p.totalDeletes.Load()
   321  		info.Metrics.TotalWaiting = uint32(p.health.waiting.Load())
   322  		info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load()
   323  		info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load()
   324  	}()
   325  
   326  	if p.health.isFaulty() {
   327  		// if disk is already faulty return faulty for 'mc admin info' output and prometheus alerts.
   328  		return info, errFaultyDisk
   329  	}
   330  
   331  	info, err = p.storage.DiskInfo(ctx, opts)
   332  	if err != nil {
   333  		return info, err
   334  	}
   335  
   336  	// check cached diskID against backend
   337  	// only if its non-empty.
   338  	if p.diskID != "" && p.diskID != info.ID {
   339  		return info, errDiskNotFound
   340  	}
   341  	return info, nil
   342  }
   343  
   344  func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) {
   345  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricMakeVolBulk, volumes...)
   346  	if err != nil {
   347  		return err
   348  	}
   349  	defer done(&err)
   350  
   351  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   352  	return w.Run(func() error { return p.storage.MakeVolBulk(ctx, volumes...) })
   353  }
   354  
   355  func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) {
   356  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricMakeVol, volume)
   357  	if err != nil {
   358  		return err
   359  	}
   360  	defer done(&err)
   361  
   362  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   363  	return w.Run(func() error { return p.storage.MakeVol(ctx, volume) })
   364  }
   365  
   366  func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) (vi []VolInfo, err error) {
   367  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricListVols, "/")
   368  	if err != nil {
   369  		return nil, err
   370  	}
   371  	defer done(&err)
   372  
   373  	return p.storage.ListVols(ctx)
   374  }
   375  
   376  func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) {
   377  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricStatVol, volume)
   378  	if err != nil {
   379  		return vol, err
   380  	}
   381  	defer done(&err)
   382  
   383  	return xioutil.WithDeadline[VolInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result VolInfo, err error) {
   384  		return p.storage.StatVol(ctx, volume)
   385  	})
   386  }
   387  
   388  func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) {
   389  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVol, volume)
   390  	if err != nil {
   391  		return err
   392  	}
   393  	defer done(&err)
   394  
   395  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   396  	return w.Run(func() error { return p.storage.DeleteVol(ctx, volume, forceDelete) })
   397  }
   398  
   399  func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, origvolume, volume, dirPath string, count int) (s []string, err error) {
   400  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricListDir, volume, dirPath)
   401  	if err != nil {
   402  		return nil, err
   403  	}
   404  	defer done(&err)
   405  
   406  	return p.storage.ListDir(ctx, origvolume, volume, dirPath, count)
   407  }
   408  
   409  // Legacy API - does not have any deadlines
   410  func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) {
   411  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadFile, volume, path)
   412  	if err != nil {
   413  		return 0, err
   414  	}
   415  	defer done(&err)
   416  
   417  	return xioutil.WithDeadline[int64](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result int64, err error) {
   418  		return p.storage.ReadFile(ctx, volume, path, offset, buf, verifier)
   419  	})
   420  }
   421  
   422  // Legacy API - does not have any deadlines
   423  func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) {
   424  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricAppendFile, volume, path)
   425  	if err != nil {
   426  		return err
   427  	}
   428  	defer done(&err)
   429  
   430  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   431  	return w.Run(func() error {
   432  		return p.storage.AppendFile(ctx, volume, path, buf)
   433  	})
   434  }
   435  
   436  func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, origvolume, volume, path string, size int64, reader io.Reader) (err error) {
   437  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricCreateFile, volume, path)
   438  	if err != nil {
   439  		return err
   440  	}
   441  	defer done(&err)
   442  
   443  	return p.storage.CreateFile(ctx, origvolume, volume, path, size, io.NopCloser(reader))
   444  }
   445  
   446  func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
   447  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadFileStream, volume, path)
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  	defer done(&err)
   452  
   453  	return xioutil.WithDeadline[io.ReadCloser](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result io.ReadCloser, err error) {
   454  		return p.storage.ReadFileStream(ctx, volume, path, offset, length)
   455  	})
   456  }
   457  
   458  func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) {
   459  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricRenameFile, srcVolume, srcPath, dstVolume, dstPath)
   460  	if err != nil {
   461  		return err
   462  	}
   463  	defer done(&err)
   464  
   465  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   466  	return w.Run(func() error { return p.storage.RenameFile(ctx, srcVolume, srcPath, dstVolume, dstPath) })
   467  }
   468  
   469  func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
   470  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricRenameData, srcPath, fi.DataDir, dstVolume, dstPath)
   471  	if err != nil {
   472  		return 0, err
   473  	}
   474  	defer func() {
   475  		if err == nil && !skipAccessChecks(dstVolume) {
   476  			p.storage.setWriteAttribute(p.totalWrites.Add(1))
   477  		}
   478  		done(&err)
   479  	}()
   480  
   481  	return xioutil.WithDeadline[uint64](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result uint64, err error) {
   482  		return p.storage.RenameData(ctx, srcVolume, srcPath, fi, dstVolume, dstPath, opts)
   483  	})
   484  }
   485  
   486  func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) {
   487  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricCheckParts, volume, path)
   488  	if err != nil {
   489  		return err
   490  	}
   491  	defer done(&err)
   492  
   493  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   494  	return w.Run(func() error { return p.storage.CheckParts(ctx, volume, path, fi) })
   495  }
   496  
   497  func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, deleteOpts DeleteOptions) (err error) {
   498  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDelete, volume, path)
   499  	if err != nil {
   500  		return err
   501  	}
   502  	defer done(&err)
   503  
   504  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   505  	return w.Run(func() error { return p.storage.Delete(ctx, volume, path, deleteOpts) })
   506  }
   507  
   508  // DeleteVersions deletes slice of versions, it can be same object
   509  // or multiple objects.
   510  func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfoVersions, opts DeleteOptions) (errs []error) {
   511  	// Merely for tracing storage
   512  	path := ""
   513  	if len(versions) > 0 {
   514  		path = versions[0].Name
   515  	}
   516  	errs = make([]error, len(versions))
   517  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVersions, volume, path)
   518  	if err != nil {
   519  		for i := range errs {
   520  			errs[i] = ctx.Err()
   521  		}
   522  		return errs
   523  	}
   524  	defer func() {
   525  		if !skipAccessChecks(volume) {
   526  			var permanentDeletes uint64
   527  			var deleteMarkers uint64
   528  
   529  			for i, nerr := range errs {
   530  				if nerr != nil {
   531  					continue
   532  				}
   533  				for _, fi := range versions[i].Versions {
   534  					if fi.Deleted {
   535  						// Delete markers are a write operation not a permanent delete.
   536  						deleteMarkers++
   537  						continue
   538  					}
   539  					permanentDeletes++
   540  				}
   541  			}
   542  			if deleteMarkers > 0 {
   543  				p.storage.setWriteAttribute(p.totalWrites.Add(deleteMarkers))
   544  			}
   545  			if permanentDeletes > 0 {
   546  				p.storage.setDeleteAttribute(p.totalDeletes.Add(permanentDeletes))
   547  			}
   548  		}
   549  		done(&err)
   550  	}()
   551  
   552  	errs = p.storage.DeleteVersions(ctx, volume, versions, opts)
   553  	for i := range errs {
   554  		if errs[i] != nil {
   555  			err = errs[i]
   556  			break
   557  		}
   558  	}
   559  
   560  	return errs
   561  }
   562  
   563  func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) {
   564  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricVerifyFile, volume, path)
   565  	if err != nil {
   566  		return err
   567  	}
   568  	defer done(&err)
   569  
   570  	return p.storage.VerifyFile(ctx, volume, path, fi)
   571  }
   572  
   573  func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) {
   574  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricWriteAll, volume, path)
   575  	if err != nil {
   576  		return err
   577  	}
   578  	defer done(&err)
   579  
   580  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   581  	return w.Run(func() error { return p.storage.WriteAll(ctx, volume, path, b) })
   582  }
   583  
   584  func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool, opts DeleteOptions) (err error) {
   585  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVersion, volume, path)
   586  	if err != nil {
   587  		return err
   588  	}
   589  	defer func() {
   590  		defer done(&err)
   591  
   592  		if err == nil && !skipAccessChecks(volume) {
   593  			if opts.UndoWrite {
   594  				p.storage.setWriteAttribute(p.totalWrites.Add(^uint64(0)))
   595  				return
   596  			}
   597  
   598  			if fi.Deleted {
   599  				// Delete markers are a write operation not a permanent delete.
   600  				p.storage.setWriteAttribute(p.totalWrites.Add(1))
   601  				return
   602  			}
   603  
   604  			p.storage.setDeleteAttribute(p.totalDeletes.Add(1))
   605  		}
   606  	}()
   607  
   608  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   609  	return w.Run(func() error { return p.storage.DeleteVersion(ctx, volume, path, fi, forceDelMarker, opts) })
   610  }
   611  
   612  func (p *xlStorageDiskIDCheck) UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo, opts UpdateMetadataOpts) (err error) {
   613  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricUpdateMetadata, volume, path)
   614  	if err != nil {
   615  		return err
   616  	}
   617  	defer done(&err)
   618  
   619  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   620  	return w.Run(func() error { return p.storage.UpdateMetadata(ctx, volume, path, fi, opts) })
   621  }
   622  
   623  func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, origvolume, volume, path string, fi FileInfo) (err error) {
   624  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricWriteMetadata, volume, path)
   625  	if err != nil {
   626  		return err
   627  	}
   628  	defer done(&err)
   629  
   630  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   631  	return w.Run(func() error { return p.storage.WriteMetadata(ctx, origvolume, volume, path, fi) })
   632  }
   633  
   634  func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, origvolume, volume, path, versionID string, opts ReadOptions) (fi FileInfo, err error) {
   635  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadVersion, volume, path)
   636  	if err != nil {
   637  		return fi, err
   638  	}
   639  	defer done(&err)
   640  
   641  	return xioutil.WithDeadline[FileInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result FileInfo, err error) {
   642  		return p.storage.ReadVersion(ctx, origvolume, volume, path, versionID, opts)
   643  	})
   644  }
   645  
   646  func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
   647  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadAll, volume, path)
   648  	if err != nil {
   649  		return nil, err
   650  	}
   651  	defer done(&err)
   652  
   653  	return xioutil.WithDeadline[[]byte](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result []byte, err error) {
   654  		return p.storage.ReadAll(ctx, volume, path)
   655  	})
   656  }
   657  
   658  func (p *xlStorageDiskIDCheck) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) {
   659  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadXL, volume, path)
   660  	if err != nil {
   661  		return RawFileInfo{}, err
   662  	}
   663  	defer done(&err)
   664  
   665  	return xioutil.WithDeadline[RawFileInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result RawFileInfo, err error) {
   666  		return p.storage.ReadXL(ctx, volume, path, readData)
   667  	})
   668  }
   669  
   670  func (p *xlStorageDiskIDCheck) StatInfoFile(ctx context.Context, volume, path string, glob bool) (stat []StatInfo, err error) {
   671  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricStatInfoFile, volume, path)
   672  	if err != nil {
   673  		return nil, err
   674  	}
   675  	defer done(&err)
   676  
   677  	return p.storage.StatInfoFile(ctx, volume, path, glob)
   678  }
   679  
   680  // ReadMultiple will read multiple files and send each files as response.
   681  // Files are read and returned in the given order.
   682  // The resp channel is closed before the call returns.
   683  // Only a canceled context will return an error.
   684  func (p *xlStorageDiskIDCheck) ReadMultiple(ctx context.Context, req ReadMultipleReq, resp chan<- ReadMultipleResp) (err error) {
   685  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadMultiple, req.Bucket, req.Prefix)
   686  	if err != nil {
   687  		xioutil.SafeClose(resp)
   688  		return err
   689  	}
   690  	defer done(&err)
   691  
   692  	return p.storage.ReadMultiple(ctx, req, resp)
   693  }
   694  
   695  // CleanAbandonedData will read metadata of the object on disk
   696  // and delete any data directories and inline data that isn't referenced in metadata.
   697  func (p *xlStorageDiskIDCheck) CleanAbandonedData(ctx context.Context, volume string, path string) (err error) {
   698  	ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteAbandonedParts, volume, path)
   699  	if err != nil {
   700  		return err
   701  	}
   702  	defer done(&err)
   703  
   704  	w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   705  	return w.Run(func() error { return p.storage.CleanAbandonedData(ctx, volume, path) })
   706  }
   707  
   708  func storageTrace(s storageMetric, startTime time.Time, duration time.Duration, path string, err string, custom map[string]string) madmin.TraceInfo {
   709  	return madmin.TraceInfo{
   710  		TraceType: madmin.TraceStorage,
   711  		Time:      startTime,
   712  		NodeName:  globalLocalNodeName,
   713  		FuncName:  "storage." + s.String(),
   714  		Duration:  duration,
   715  		Path:      path,
   716  		Error:     err,
   717  		Custom:    custom,
   718  	}
   719  }
   720  
   721  func scannerTrace(s scannerMetric, startTime time.Time, duration time.Duration, path string, custom map[string]string) madmin.TraceInfo {
   722  	return madmin.TraceInfo{
   723  		TraceType: madmin.TraceScanner,
   724  		Time:      startTime,
   725  		NodeName:  globalLocalNodeName,
   726  		FuncName:  "scanner." + s.String(),
   727  		Duration:  duration,
   728  		Path:      path,
   729  		Custom:    custom,
   730  	}
   731  }
   732  
   733  // Update storage metrics
   734  func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...string) func(err *error) {
   735  	startTime := time.Now()
   736  	trace := globalTrace.NumSubscribers(madmin.TraceStorage) > 0
   737  	return func(errp *error) {
   738  		duration := time.Since(startTime)
   739  
   740  		var err error
   741  		if errp != nil && *errp != nil {
   742  			err = *errp
   743  		}
   744  
   745  		atomic.AddUint64(&p.apiCalls[s], 1)
   746  		if IsErr(err, []error{
   747  			errVolumeAccessDenied,
   748  			errFileAccessDenied,
   749  			errDiskAccessDenied,
   750  			errFaultyDisk,
   751  			errFaultyRemoteDisk,
   752  			context.DeadlineExceeded,
   753  		}...) {
   754  			p.totalErrsAvailability.Add(1)
   755  			if errors.Is(err, context.DeadlineExceeded) {
   756  				p.totalErrsTimeout.Add(1)
   757  			}
   758  		}
   759  
   760  		p.apiLatencies[s].add(duration)
   761  
   762  		if trace {
   763  			custom := make(map[string]string, 2)
   764  			paths = append([]string{p.String()}, paths...)
   765  			var errStr string
   766  			if err != nil {
   767  				errStr = err.Error()
   768  			}
   769  			custom["total-errs-timeout"] = strconv.FormatUint(p.totalErrsTimeout.Load(), 10)
   770  			custom["total-errs-availability"] = strconv.FormatUint(p.totalErrsAvailability.Load(), 10)
   771  			globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " "), errStr, custom))
   772  		}
   773  	}
   774  }
   775  
   776  const (
   777  	diskHealthOK int32 = iota
   778  	diskHealthFaulty
   779  )
   780  
   781  type diskHealthTracker struct {
   782  	// atomic time of last success
   783  	lastSuccess int64
   784  
   785  	// atomic time of last time a token was grabbed.
   786  	lastStarted int64
   787  
   788  	// Atomic status of disk.
   789  	status atomic.Int32
   790  
   791  	// Atomic number indicates if a disk is hung
   792  	waiting atomic.Int32
   793  }
   794  
   795  // newDiskHealthTracker creates a new disk health tracker.
   796  func newDiskHealthTracker() *diskHealthTracker {
   797  	d := diskHealthTracker{
   798  		lastSuccess: time.Now().UnixNano(),
   799  		lastStarted: time.Now().UnixNano(),
   800  	}
   801  	d.status.Store(diskHealthOK)
   802  	return &d
   803  }
   804  
   805  // logSuccess will update the last successful operation time.
   806  func (d *diskHealthTracker) logSuccess() {
   807  	atomic.StoreInt64(&d.lastSuccess, time.Now().UnixNano())
   808  }
   809  
   810  func (d *diskHealthTracker) isFaulty() bool {
   811  	return d.status.Load() == diskHealthFaulty
   812  }
   813  
   814  type (
   815  	healthDiskCtxKey   struct{}
   816  	healthDiskCtxValue struct {
   817  		lastSuccess *int64
   818  	}
   819  )
   820  
   821  // logSuccess will update the last successful operation time.
   822  func (h *healthDiskCtxValue) logSuccess() {
   823  	atomic.StoreInt64(h.lastSuccess, time.Now().UnixNano())
   824  }
   825  
   826  // noopDoneFunc is a no-op done func.
   827  // Can be reused.
   828  var noopDoneFunc = func(_ *error) {}
   829  
   830  // TrackDiskHealth for this request.
   831  // When a non-nil error is returned 'done' MUST be called
   832  // with the status of the response, if it corresponds to disk health.
   833  // If the pointer sent to done is non-nil AND the error
   834  // is either nil or io.EOF the disk is considered good.
   835  // So if unsure if the disk status is ok, return nil as a parameter to done.
   836  // Shadowing will work as long as return error is named: https://go.dev/play/p/sauq86SsTN2
   837  func (p *xlStorageDiskIDCheck) TrackDiskHealth(ctx context.Context, s storageMetric, paths ...string) (c context.Context, done func(*error), err error) {
   838  	done = noopDoneFunc
   839  	if contextCanceled(ctx) {
   840  		return ctx, done, ctx.Err()
   841  	}
   842  
   843  	if p.health.status.Load() != diskHealthOK {
   844  		return ctx, done, errFaultyDisk
   845  	}
   846  
   847  	// Verify if the disk is not stale
   848  	// - missing format.json (unformatted drive)
   849  	// - format.json is valid but invalid 'uuid'
   850  	if err = p.checkDiskStale(); err != nil {
   851  		return ctx, done, err
   852  	}
   853  
   854  	// Disallow recursive tracking to avoid deadlocks.
   855  	if ctx.Value(healthDiskCtxKey{}) != nil {
   856  		done = p.updateStorageMetrics(s, paths...)
   857  		return ctx, done, nil
   858  	}
   859  
   860  	if contextCanceled(ctx) {
   861  		return ctx, done, ctx.Err()
   862  	}
   863  
   864  	atomic.StoreInt64(&p.health.lastStarted, time.Now().UnixNano())
   865  	p.health.waiting.Add(1)
   866  
   867  	ctx = context.WithValue(ctx, healthDiskCtxKey{}, &healthDiskCtxValue{lastSuccess: &p.health.lastSuccess})
   868  	si := p.updateStorageMetrics(s, paths...)
   869  	var once sync.Once
   870  	return ctx, func(errp *error) {
   871  		p.health.waiting.Add(-1)
   872  		once.Do(func() {
   873  			if errp != nil {
   874  				err := *errp
   875  				if err == nil || errors.Is(err, io.EOF) {
   876  					p.health.logSuccess()
   877  				}
   878  			}
   879  			si(errp)
   880  		})
   881  	}, nil
   882  }
   883  
   884  var toWrite = []byte{2048: 42}
   885  
   886  // monitorDiskStatus should be called once when a drive has been marked offline.
   887  // Once the disk has been deemed ok, it will return to online status.
   888  func (p *xlStorageDiskIDCheck) monitorDiskStatus(spent time.Duration, fn string) {
   889  	t := time.NewTicker(5 * time.Second)
   890  	defer t.Stop()
   891  
   892  	for range t.C {
   893  		if contextCanceled(p.diskCtx) {
   894  			return
   895  		}
   896  
   897  		err := p.storage.WriteAll(context.Background(), minioMetaTmpBucket, fn, toWrite)
   898  		if err != nil {
   899  			continue
   900  		}
   901  
   902  		b, err := p.storage.ReadAll(context.Background(), minioMetaTmpBucket, fn)
   903  		if err != nil || len(b) != len(toWrite) {
   904  			continue
   905  		}
   906  
   907  		err = p.storage.Delete(context.Background(), minioMetaTmpBucket, fn, DeleteOptions{
   908  			Recursive: false,
   909  			Immediate: false,
   910  		})
   911  
   912  		if err == nil {
   913  			logger.Event(context.Background(), "node(%s): Read/Write/Delete successful, bringing drive %s online", globalLocalNodeName, p.storage.String())
   914  			p.health.status.Store(diskHealthOK)
   915  			p.health.waiting.Add(-1)
   916  			return
   917  		}
   918  	}
   919  }
   920  
   921  // monitorDiskStatus should be called once when a drive has been marked offline.
   922  // Once the disk has been deemed ok, it will return to online status.
   923  func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) {
   924  	var (
   925  		// We check every 15 seconds if the disk is writable and we can read back.
   926  		checkEvery = 15 * time.Second
   927  
   928  		// If the disk has completed an operation successfully within last 5 seconds, don't check it.
   929  		skipIfSuccessBefore = 5 * time.Second
   930  	)
   931  
   932  	// if disk max timeout is smaller than checkEvery window
   933  	// reduce checks by a second.
   934  	if globalDriveConfig.GetMaxTimeout() <= checkEvery {
   935  		checkEvery = globalDriveConfig.GetMaxTimeout() - time.Second
   936  		if checkEvery <= 0 {
   937  			checkEvery = globalDriveConfig.GetMaxTimeout()
   938  		}
   939  	}
   940  
   941  	// if disk max timeout is smaller than skipIfSuccessBefore window
   942  	// reduce the skipIfSuccessBefore by a second.
   943  	if globalDriveConfig.GetMaxTimeout() <= skipIfSuccessBefore {
   944  		skipIfSuccessBefore = globalDriveConfig.GetMaxTimeout() - time.Second
   945  		if skipIfSuccessBefore <= 0 {
   946  			skipIfSuccessBefore = globalDriveConfig.GetMaxTimeout()
   947  		}
   948  	}
   949  
   950  	t := time.NewTicker(checkEvery)
   951  	defer t.Stop()
   952  	fn := mustGetUUID()
   953  
   954  	rng := rand.New(rand.NewSource(time.Now().UnixNano()))
   955  
   956  	monitor := func() bool {
   957  		if contextCanceled(ctx) {
   958  			return false
   959  		}
   960  
   961  		if p.health.status.Load() != diskHealthOK {
   962  			return true
   963  		}
   964  
   965  		if time.Since(time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess))) < skipIfSuccessBefore {
   966  			// We recently saw a success - no need to check.
   967  			return true
   968  		}
   969  
   970  		goOffline := func(err error, spent time.Duration) {
   971  			if p.health.status.CompareAndSwap(diskHealthOK, diskHealthFaulty) {
   972  				logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline: %v", globalLocalNodeName, p.storage.String(), err))
   973  				p.health.waiting.Add(1)
   974  				go p.monitorDiskStatus(spent, fn)
   975  			}
   976  		}
   977  
   978  		// Offset checks a bit.
   979  		time.Sleep(time.Duration(rng.Int63n(int64(1 * time.Second))))
   980  
   981  		dctx, dcancel := context.WithCancel(ctx)
   982  		started := time.Now()
   983  		go func() {
   984  			timeout := time.NewTimer(globalDriveConfig.GetMaxTimeout())
   985  			select {
   986  			case <-dctx.Done():
   987  				if !timeout.Stop() {
   988  					<-timeout.C
   989  				}
   990  			case <-timeout.C:
   991  				spent := time.Since(started)
   992  				goOffline(fmt.Errorf("unable to write+read for %v", spent.Round(time.Millisecond)), spent)
   993  			}
   994  		}()
   995  
   996  		func() {
   997  			defer dcancel()
   998  
   999  			err := p.storage.WriteAll(ctx, minioMetaTmpBucket, fn, toWrite)
  1000  			if err != nil {
  1001  				if osErrToFileErr(err) == errFaultyDisk {
  1002  					goOffline(fmt.Errorf("unable to write: %w", err), 0)
  1003  				}
  1004  				return
  1005  			}
  1006  			b, err := p.storage.ReadAll(context.Background(), minioMetaTmpBucket, fn)
  1007  			if err != nil || len(b) != len(toWrite) {
  1008  				if osErrToFileErr(err) == errFaultyDisk {
  1009  					goOffline(fmt.Errorf("unable to read: %w", err), 0)
  1010  				}
  1011  				return
  1012  			}
  1013  		}()
  1014  
  1015  		// Continue to monitor
  1016  		return true
  1017  	}
  1018  
  1019  	for {
  1020  		select {
  1021  		case <-ctx.Done():
  1022  			return
  1023  		case <-t.C:
  1024  			if !monitor() {
  1025  				return
  1026  			}
  1027  		}
  1028  	}
  1029  }
  1030  
  1031  // checkID will check if the disk ID matches the provided ID.
  1032  func (p *xlStorageDiskIDCheck) checkID(wantID string) (err error) {
  1033  	if wantID == "" {
  1034  		return nil
  1035  	}
  1036  	id, err := p.storage.GetDiskID()
  1037  	if err != nil {
  1038  		return err
  1039  	}
  1040  	if id != wantID {
  1041  		return fmt.Errorf("disk ID %s does not match. disk reports %s", wantID, id)
  1042  	}
  1043  	return nil
  1044  }
  1045  
  1046  // diskHealthCheckOK will check if the provided error is nil
  1047  // and update disk status if good.
  1048  // For convenience a bool is returned to indicate any error state
  1049  // that is not io.EOF.
  1050  func diskHealthCheckOK(ctx context.Context, err error) bool {
  1051  	// Check if context has a disk health check.
  1052  	tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue)
  1053  	if !ok {
  1054  		// No tracker, return
  1055  		return err == nil || errors.Is(err, io.EOF)
  1056  	}
  1057  	if err == nil || errors.Is(err, io.EOF) {
  1058  		tracker.logSuccess()
  1059  		return true
  1060  	}
  1061  	return false
  1062  }
  1063  
  1064  // diskHealthWrapper provides either a io.Reader or io.Writer
  1065  // that updates status of the provided tracker.
  1066  // Use through diskHealthReader or diskHealthWriter.
  1067  type diskHealthWrapper struct {
  1068  	tracker *healthDiskCtxValue
  1069  	r       io.Reader
  1070  	w       io.Writer
  1071  }
  1072  
  1073  func (d *diskHealthWrapper) Read(p []byte) (int, error) {
  1074  	if d.r == nil {
  1075  		return 0, fmt.Errorf("diskHealthWrapper: Read with no reader")
  1076  	}
  1077  	n, err := d.r.Read(p)
  1078  	if err == nil || err == io.EOF && n > 0 {
  1079  		d.tracker.logSuccess()
  1080  	}
  1081  	return n, err
  1082  }
  1083  
  1084  func (d *diskHealthWrapper) Write(p []byte) (int, error) {
  1085  	if d.w == nil {
  1086  		return 0, fmt.Errorf("diskHealthWrapper: Write with no writer")
  1087  	}
  1088  	n, err := d.w.Write(p)
  1089  	if err == nil && n == len(p) {
  1090  		d.tracker.logSuccess()
  1091  	}
  1092  	return n, err
  1093  }
  1094  
  1095  // diskHealthReader provides a wrapper that will update disk health on
  1096  // ctx, on every successful read.
  1097  // This should only be used directly at the os/syscall level,
  1098  // otherwise buffered operations may return false health checks.
  1099  func diskHealthReader(ctx context.Context, r io.Reader) io.Reader {
  1100  	// Check if context has a disk health check.
  1101  	tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue)
  1102  	if !ok {
  1103  		// No need to wrap
  1104  		return r
  1105  	}
  1106  	return &diskHealthWrapper{r: r, tracker: tracker}
  1107  }
  1108  
  1109  // diskHealthWriter provides a wrapper that will update disk health on
  1110  // ctx, on every successful write.
  1111  // This should only be used directly at the os/syscall level,
  1112  // otherwise buffered operations may return false health checks.
  1113  func diskHealthWriter(ctx context.Context, w io.Writer) io.Writer {
  1114  	// Check if context has a disk health check.
  1115  	tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue)
  1116  	if !ok {
  1117  		// No need to wrap
  1118  		return w
  1119  	}
  1120  	return &diskHealthWrapper{w: w, tracker: tracker}
  1121  }